Skip to content

Commit fed039c

Browse files
authored
Merge pull request #1 from razdoburdin/release_1.7.3_oneapi
Oneapi plugin for xgboost 1.7.3
2 parents 76bdca0 + ffb7996 commit fed039c

23 files changed

+4469
-268
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,10 @@ if (USE_CUDA)
158158
endif ()
159159
endif (USE_CUDA)
160160

161+
if (PLUGIN_UPDATER_ONEAPI)
162+
target_compile_definitions(xgboost PRIVATE -DXGBOOST_USE_ONEAPI=1)
163+
endif (PLUGIN_UPDATER_ONEAPI)
164+
161165
if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
162166
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
163167
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))

include/xgboost/generic_parameters.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
2121
// Constant representing the device ID of CPU.
2222
static int32_t constexpr kCpuId = -1;
2323
static int64_t constexpr kDefaultSeed = 0;
24+
static int32_t constexpr kDefaultId = -1;
2425

2526
public:
2627
GenericParameter();
@@ -38,6 +39,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
3839
bool fail_on_invalid_gpu_id {false};
3940
bool validate_parameters {false};
4041

42+
// primary oneAPI device, -1 means default device
43+
int device_id;
44+
4145
/*!
4246
* \brief Configure the parameter `gpu_id'.
4347
*
@@ -73,6 +77,11 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
7377
DMLC_DECLARE_FIELD(validate_parameters)
7478
.set_default(false)
7579
.describe("Enable checking whether parameters are used or not.");
80+
81+
DMLC_DECLARE_FIELD(device_id)
82+
.set_default(kDefaultId)
83+
.set_lower_bound(-1)
84+
.describe("The primary oneAPI device ordinal.");
7685
}
7786
};
7887

plugin/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ endif (PLUGIN_DENSE_PARSER)
44

55
if (PLUGIN_UPDATER_ONEAPI)
66
add_library(oneapi_plugin OBJECT
7+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/hist_util_oneapi.cc
78
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc
9+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/multiclass_obj_oneapi.cc
10+
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc
811
${xgboost_SOURCE_DIR}/plugin/updater_oneapi/predictor_oneapi.cc)
912
target_include_directories(oneapi_plugin
1013
PRIVATE

plugin/updater_oneapi/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
This plugin adds support of OneAPI programming model for tree construction and prediction algorithms to XGBoost.
33

44
## Usage
5+
Specify the 'updater' parameter as one of the following options to offload model training on OneAPI device.
6+
7+
### Algorithms
8+
| updater | Description |
9+
| --- | --- |
10+
grow_quantile_histmaker_oneapi | model training using OneAPI device |
511
Specify the 'objective' parameter as one of the following options to offload computation of objective function on OneAPI device.
612

713
### Algorithms
@@ -24,6 +30,7 @@ Please note that parameter names are not finalized and can be changed during fur
2430

2531
Python example:
2632
```python
33+
param['updater'] = 'grow_quantile_histmaker_oneapi'
2734
param['predictor'] = 'predictor_oneapi'
2835
param['objective'] = 'reg:squarederror_oneapi'
2936
```

plugin/updater_oneapi/data_oneapi.h

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*!
2+
* Copyright by Contributors 2017-2023
3+
*/
4+
#ifndef XGBOOST_COMMON_DATA_ONEAPI_H_
5+
#define XGBOOST_COMMON_DATA_ONEAPI_H_
6+
7+
#include <cstddef>
8+
#include <limits>
9+
#include <mutex>
10+
11+
#include "xgboost/base.h"
12+
#include "xgboost/data.h"
13+
#include "xgboost/logging.h"
14+
#include "xgboost/host_device_vector.h"
15+
16+
#include "../../src/common/threading_utils.h"
17+
18+
#include "CL/sycl.hpp"
19+
20+
namespace xgboost {
21+
22+
template <typename T>
23+
class USMDeleter {
24+
public:
25+
explicit USMDeleter(sycl::queue qu) : qu_(qu) {}
26+
27+
void operator()(T* data) const {
28+
sycl::free(data, qu_);
29+
}
30+
31+
private:
32+
sycl::queue qu_;
33+
};
34+
35+
/* OneAPI implementation of a HostDeviceVector, storing both host and device memory in a single USM buffer.
36+
Synchronization between host and device is managed by the compiler runtime. */
37+
template <typename T>
38+
class USMVector {
39+
static_assert(std::is_standard_layout<T>::value, "USMVector admits only POD types");
40+
41+
public:
42+
USMVector() : size_(0), data_(nullptr) {}
43+
44+
USMVector(sycl::queue qu) : qu_(qu), size_(0), data_(nullptr) {}
45+
46+
USMVector(sycl::queue qu, size_t size) : qu_(qu), size_(size) {
47+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
48+
}
49+
50+
USMVector(sycl::queue qu, size_t size, T v) : qu_(qu), size_(size) {
51+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
52+
qu_.fill(data_.get(), v, size_).wait();
53+
}
54+
55+
USMVector(sycl::queue qu, const std::vector<T> &vec) : qu_(qu) {
56+
size_ = vec.size();
57+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
58+
std::copy(vec.begin (), vec.end (), data_.get());
59+
}
60+
61+
USMVector(const USMVector<T>& other) : qu_(other.qu_), size_(other.size_), data_(other.data_) {
62+
}
63+
64+
~USMVector() {
65+
}
66+
67+
USMVector<T>& operator=(const USMVector<T>& other) {
68+
qu_ = other.qu_;
69+
size_ = other.size_;
70+
data_ = other.data_;
71+
return *this;
72+
}
73+
74+
T* Data() { return data_.get(); }
75+
const T* DataConst() const { return data_.get(); }
76+
77+
size_t Size() const { return size_; }
78+
79+
T& operator[] (size_t i) { return data_.get()[i]; }
80+
const T& operator[] (size_t i) const { return data_.get()[i]; }
81+
82+
T* Begin () const { return data_.get(); }
83+
T* End () const { return data_.get() + size_; }
84+
85+
bool Empty() const { return (size_ == 0); }
86+
87+
void Clear() {
88+
data_.reset();
89+
size_ = 0;
90+
}
91+
92+
void Resize(sycl::queue qu, size_t size_new) {
93+
qu_ = qu;
94+
if (size_new <= size_) {
95+
size_ = size_new;
96+
} else {
97+
size_t size_old = size_;
98+
auto data_old = data_;
99+
size_ = size_new;
100+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
101+
if (size_old > 0) {
102+
qu_.memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();
103+
}
104+
}
105+
}
106+
107+
void Resize(sycl::queue qu, size_t size_new, T v) {
108+
qu_ = qu;
109+
if (size_new <= size_) {
110+
size_ = size_new;
111+
} else {
112+
size_t size_old = size_;
113+
auto data_old = data_;
114+
size_ = size_new;
115+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
116+
if (size_old > 0) {
117+
qu_.memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();
118+
}
119+
if (size_new > size_old) {
120+
qu_.fill(data_.get() + size_old, v, size_new - size_old).wait();
121+
}
122+
}
123+
}
124+
125+
void Init(sycl::queue qu, const std::vector<T> &vec) {
126+
qu_ = qu;
127+
size_ = vec.size();
128+
data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
129+
std::copy(vec.begin(), vec.end(), data_.get());
130+
}
131+
132+
using value_type = T; // NOLINT
133+
134+
private:
135+
sycl::queue qu_;
136+
size_t size_;
137+
std::shared_ptr<T> data_;
138+
};
139+
140+
/* Wrapper for DMatrix which stores all batches in a single USM buffer */
141+
struct DeviceMatrixOneAPI {
142+
DMatrix* p_mat; // Pointer to the original matrix on the host
143+
sycl::queue qu_;
144+
USMVector<size_t> row_ptr;
145+
USMVector<Entry> data;
146+
size_t total_offset;
147+
148+
DeviceMatrixOneAPI(sycl::queue qu, DMatrix* dmat) : p_mat(dmat), qu_(qu) {
149+
size_t num_row = 0;
150+
size_t num_nonzero = 0;
151+
for (auto &batch : dmat->GetBatches<SparsePage>()) {
152+
const auto& data_vec = batch.data.HostVector();
153+
const auto& offset_vec = batch.offset.HostVector();
154+
num_nonzero += data_vec.size();
155+
num_row += batch.Size();
156+
}
157+
158+
row_ptr.Resize(qu_, num_row + 1);
159+
data.Resize(qu_, num_nonzero);
160+
161+
size_t data_offset = 0;
162+
for (auto &batch : dmat->GetBatches<SparsePage>()) {
163+
const auto& data_vec = batch.data.HostVector();
164+
const auto& offset_vec = batch.offset.HostVector();
165+
size_t batch_size = batch.Size();
166+
if (batch_size > 0) {
167+
std::copy(offset_vec.data(), offset_vec.data() + batch_size,
168+
row_ptr.Data() + batch.base_rowid);
169+
if (batch.base_rowid > 0) {
170+
for(size_t i = 0; i < batch_size; i++)
171+
row_ptr[i + batch.base_rowid] += batch.base_rowid;
172+
}
173+
std::copy(data_vec.data(), data_vec.data() + offset_vec[batch_size],
174+
data.Data() + data_offset);
175+
data_offset += offset_vec[batch_size];
176+
}
177+
}
178+
row_ptr[num_row] = data_offset;
179+
total_offset = data_offset;
180+
}
181+
182+
~DeviceMatrixOneAPI() {
183+
}
184+
};
185+
186+
} // namespace xgboost
187+
188+
#endif

0 commit comments

Comments
 (0)