Skip to content

Commit 23b9c61

Browse files
authored
Release 2.0.0 oneapi (#5)
* initial, not ready for work * fixes for obj functions * fix some compilation problems * fix some errors * fixes * improve context * plugin compiled and somtimes works * fix the errors. tests passed * fix compilation error wo oneapi * black * README update --------- Co-authored-by: Dmitry Razdoburdin <>
1 parent 66ee89d commit 23b9c61

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+5233
-417
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ if (USE_CUDA)
170170
find_package(CUDAToolkit REQUIRED)
171171
endif (USE_CUDA)
172172

173+
if (PLUGIN_UPDATER_ONEAPI)
174+
target_compile_definitions(xgboost PRIVATE -DXGBOOST_USE_ONEAPI=1)
175+
endif (PLUGIN_UPDATER_ONEAPI)
176+
173177
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
174178
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
175179
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))

include/xgboost/context.h

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,29 @@ struct CUDAContext;
2222
struct DeviceSym {
2323
static auto constexpr CPU() { return "cpu"; }
2424
static auto constexpr CUDA() { return "cuda"; }
25+
static auto constexpr SYCL_default() { return "sycl"; }
26+
static auto constexpr SYCL_CPU() { return "sycl:cpu"; }
27+
static auto constexpr SYCL_GPU() { return "sycl:gpu"; }
2528
};
2629

2730
/**
2831
* @brief A type for device ordinal. The type is packed into 32-bit for efficient use in
2932
* viewing types like `linalg::TensorView`.
3033
*/
34+
constexpr static bst_d_ordinal_t kDefaultOrdinal = -1;
3135
struct DeviceOrd {
32-
enum Type : std::int16_t { kCPU = 0, kCUDA = 1 } device{kCPU};
33-
// CUDA device ordinal.
34-
bst_d_ordinal_t ordinal{-1};
36+
enum Type : std::int16_t { kCPU = 0, kCUDA = 1, kSyclDefault = 2, kSyclCPU = 3, kSyclGPU = 4} device{kCPU};
37+
// CUDA or Sycl device ordinal.
38+
bst_d_ordinal_t ordinal{kDefaultOrdinal};
3539

3640
[[nodiscard]] bool IsCUDA() const { return device == kCUDA; }
3741
[[nodiscard]] bool IsCPU() const { return device == kCPU; }
42+
[[nodiscard]] bool IsSyclDefault() const { return device == kSyclDefault; }
43+
[[nodiscard]] bool IsSyclCPU() const { return device == kSyclCPU; }
44+
[[nodiscard]] bool IsSyclGPU() const { return device == kSyclGPU; }
45+
[[nodiscard]] bool IsSycl() const { return (IsSyclDefault() ||
46+
IsSyclCPU() ||
47+
IsSyclGPU()); }
3848

3949
DeviceOrd() = default;
4050
constexpr DeviceOrd(Type type, bst_d_ordinal_t ord) : device{type}, ordinal{ord} {}
@@ -47,14 +57,35 @@ struct DeviceOrd {
4757
/**
4858
* @brief Constructor for CPU.
4959
*/
50-
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, -1}; }
60+
[[nodiscard]] constexpr static auto CPU() { return DeviceOrd{kCPU, kDefaultOrdinal}; }
5161
/**
5262
* @brief Constructor for CUDA device.
5363
*
5464
* @param ordinal CUDA device ordinal.
5565
*/
5666
[[nodiscard]] static auto CUDA(bst_d_ordinal_t ordinal) { return DeviceOrd{kCUDA, ordinal}; }
5767

68+
/**
69+
* @brief Constructor for SYCL.
70+
*
71+
* @param ordinal SYCL device ordinal.
72+
*/
73+
[[nodiscard]] constexpr static auto SYCL_default(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclDefault, ordinal}; }
74+
75+
/**
76+
* @brief Constructor for SYCL CPU.
77+
*
78+
* @param ordinal SYCL CPU device ordinal.
79+
*/
80+
[[nodiscard]] constexpr static auto SYCL_CPU(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclCPU, ordinal}; }
81+
82+
/**
83+
* @brief Constructor for SYCL GPU.
84+
*
85+
* @param ordinal SYCL GPU device ordinal.
86+
*/
87+
[[nodiscard]] constexpr static auto SYCL_GPU(bst_d_ordinal_t ordinal = kDefaultOrdinal) { return DeviceOrd{kSyclGPU, ordinal}; }
88+
5889
[[nodiscard]] bool operator==(DeviceOrd const& that) const {
5990
return device == that.device && ordinal == that.ordinal;
6091
}
@@ -68,6 +99,12 @@ struct DeviceOrd {
6899
return DeviceSym::CPU();
69100
case DeviceOrd::kCUDA:
70101
return DeviceSym::CUDA() + (':' + std::to_string(ordinal));
102+
case DeviceOrd::kSyclDefault:
103+
return DeviceSym::SYCL_default() + (':' + std::to_string(ordinal));
104+
case DeviceOrd::kSyclCPU:
105+
return DeviceSym::SYCL_CPU() + (':' + std::to_string(ordinal));
106+
case DeviceOrd::kSyclGPU:
107+
return DeviceSym::SYCL_GPU() + (':' + std::to_string(ordinal));
71108
default: {
72109
LOG(FATAL) << "Unknown device.";
73110
return "";
@@ -135,6 +172,25 @@ struct Context : public XGBoostParameter<Context> {
135172
* @brief Is XGBoost running on a CUDA device?
136173
*/
137174
[[nodiscard]] bool IsCUDA() const { return Device().IsCUDA(); }
175+
/**
176+
* @brief Is XGBoost running on the default SYCL device?
177+
*/
178+
[[nodiscard]] bool IsSyclDefault() const { return Device().IsSyclDefault(); }
179+
/**
180+
* @brief Is XGBoost running on a SYCL CPU?
181+
*/
182+
[[nodiscard]] bool IsSyclCPU() const { return Device().IsSyclCPU(); }
183+
/**
184+
* @brief Is XGBoost running on a SYCL GPU?
185+
*/
186+
[[nodiscard]] bool IsSyclGPU() const { return Device().IsSyclGPU(); }
187+
/**
188+
* @brief Is XGBoost running on any SYCL device?
189+
*/
190+
[[nodiscard]] bool IsSycl() const { return IsSyclDefault()
191+
|| IsSyclCPU()
192+
|| IsSyclGPU(); }
193+
138194
/**
139195
* @brief Get the current device and ordinal.
140196
*/
@@ -171,6 +227,29 @@ struct Context : public XGBoostParameter<Context> {
171227
/**
172228
* @brief Call function based on the current device.
173229
*/
230+
template <typename CPUFn, typename CUDAFn, typename SYCLFn>
231+
decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn, SYCLFn&& sycl_fn) const {
232+
static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<CUDAFn>>);
233+
switch (this->Device().device) {
234+
case DeviceOrd::kCPU:
235+
return cpu_fn();
236+
case DeviceOrd::kCUDA:
237+
return cuda_fn();
238+
case DeviceOrd::kSyclDefault:
239+
return sycl_fn();
240+
case DeviceOrd::kSyclCPU:
241+
return sycl_fn();
242+
case DeviceOrd::kSyclGPU:
243+
return sycl_fn();
244+
default:
245+
// Do not use the device name as this is likely an internal error, the name
246+
// wouldn't be valid.
247+
LOG(FATAL) << "Unknown device type:"
248+
<< static_cast<std::underlying_type_t<DeviceOrd::Type>>(this->Device().device);
249+
break;
250+
}
251+
return std::invoke_result_t<CPUFn>();
252+
}
174253
template <typename CPUFn, typename CUDAFn>
175254
decltype(auto) DispatchDevice(CPUFn&& cpu_fn, CUDAFn&& cuda_fn) const {
176255
static_assert(std::is_same_v<std::invoke_result_t<CPUFn>, std::invoke_result_t<CUDAFn>>);
@@ -179,6 +258,12 @@ struct Context : public XGBoostParameter<Context> {
179258
return cpu_fn();
180259
case DeviceOrd::kCUDA:
181260
return cuda_fn();
261+
case DeviceOrd::kSyclDefault:
262+
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
263+
case DeviceOrd::kSyclCPU:
264+
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
265+
case DeviceOrd::kSyclGPU:
266+
LOG(FATAL) << "The requested feature is not implemented for sycl yet";
182267
default:
183268
// Do not use the device name as this is likely an internal error, the name
184269
// wouldn't be valid.
@@ -213,7 +298,9 @@ struct Context : public XGBoostParameter<Context> {
213298
void SetDeviceOrdinal(Args const& kwargs);
214299
Context& SetDevice(DeviceOrd d) {
215300
this->device_ = d;
216-
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
301+
if (d.IsCUDA()) {
302+
this->gpu_id = d.ordinal; // this can be removed once we move away from `gpu_id`.
303+
}
217304
this->device = d.Name();
218305
return *this;
219306
}

include/xgboost/linalg.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -596,13 +596,13 @@ auto MakeTensorView(Context const *ctx, common::Span<T> data, S &&...shape) {
596596

597597
template <typename T, typename... S>
598598
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> *data, S &&...shape) {
599-
auto span = ctx->IsCPU() ? data->HostSpan() : data->DeviceSpan();
599+
auto span = ctx->IsCUDA() ? data->DeviceSpan() : data->HostSpan();
600600
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
601601
}
602602

603603
template <typename T, typename... S>
604604
auto MakeTensorView(Context const *ctx, HostDeviceVector<T> const *data, S &&...shape) {
605-
auto span = ctx->IsCPU() ? data->ConstHostSpan() : data->ConstDeviceSpan();
605+
auto span = ctx->IsCUDA() ? data->ConstDeviceSpan() : data->ConstHostSpan();
606606
return MakeTensorView(ctx->gpu_id, span, std::forward<S>(shape)...);
607607
}
608608

plugin/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ endif (PLUGIN_DENSE_PARSER)
44

55
if (PLUGIN_UPDATER_ONEAPI)
66
add_library(oneapi_plugin OBJECT
7+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/hist_util_oneapi.cc
78
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc
9+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/multiclass_obj_oneapi.cc
10+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc
11+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/device_manager_oneapi.cc
812
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/predictor_oneapi.cc)
913
target_include_directories(oneapi_plugin
1014
PRIVATE

plugin/updater_oneapi/README.md

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,20 @@
22
This plugin adds support of OneAPI programming model for tree construction and prediction algorithms to XGBoost.
33

44
## Usage
5-
Specify the 'objective' parameter as one of the following options to offload computation of objective function on OneAPI device.
5+
Specify the 'device' parameter as one of the following options to offload model training and inference on OneAPI device.
66

77
### Algorithms
8-
| objective | Description |
8+
| device | Description |
99
| --- | --- |
10-
reg:squarederror_oneapi | regression with squared loss |
11-
reg:squaredlogerror_oneapi | regression with root mean squared logarithmic loss |
12-
reg:logistic_oneapi | logistic regression for probability regression task |
13-
binary:logistic_oneapi | logistic regression for binary classification task |
14-
binary:logitraw_oneapi | logistic regression for classification, output score before logistic transformation |
15-
16-
Specify the 'predictor' parameter as one of the following options to offload prediction stage on OneAPI device.
17-
18-
### Algorithms
19-
| predictor | Description |
20-
| --- | --- |
21-
predictor_oneapi | prediction using OneAPI device |
22-
23-
Please note that parameter names are not finalized and can be changed during further integration of OneAPI support.
10+
sycl | use default sycl device |
11+
sycl:gpu | use default sycl gpu |
12+
sycl:cpu | use default sycl cpu |
13+
sycl:gpu:N | use sycl gpu number N |
14+
sycl:cpu:N | use sycl cpu number N |
2415

2516
Python example:
2617
```python
27-
param['predictor'] = 'predictor_oneapi'
28-
param['objective'] = 'reg:squarederror_oneapi'
18+
param['device'] = 'sycl:gpu:0'
2919
```
3020

3121
## Dependencies

0 commit comments

Comments
 (0)