1
+ /* !
2
+ * Copyright by Contributors 2017-2023
3
+ */
4
+ #ifndef XGBOOST_COMMON_DATA_ONEAPI_H_
5
+ #define XGBOOST_COMMON_DATA_ONEAPI_H_
6
+
7
+ #include < cstddef>
8
+ #include < limits>
9
+ #include < mutex>
10
+
11
+ #include " xgboost/base.h"
12
+ #include " xgboost/data.h"
13
+ #include " xgboost/logging.h"
14
+ #include " xgboost/host_device_vector.h"
15
+
16
+ #include " ../../src/common/threading_utils.h"
17
+
18
+ #include " CL/sycl.hpp"
19
+
20
+ namespace xgboost {
21
+
22
+ template <typename T>
23
+ class USMDeleter {
24
+ public:
25
+ explicit USMDeleter (sycl::queue qu) : qu_(qu) {}
26
+
27
+ void operator ()(T* data) const {
28
+ sycl::free (data, qu_);
29
+ }
30
+
31
+ private:
32
+ sycl::queue qu_;
33
+ };
34
+
35
+ /* OneAPI implementation of a HostDeviceVector, storing both host and device memory in a single USM buffer.
36
+ Synchronization between host and device is managed by the compiler runtime. */
37
+ template <typename T>
38
+ class USMVector {
39
+ static_assert (std::is_standard_layout<T>::value, " USMVector admits only POD types" );
40
+
41
+ public:
42
+ USMVector () : size_(0 ), data_(nullptr ) {}
43
+
44
+ USMVector (sycl::queue qu) : qu_(qu), size_(0 ), data_(nullptr ) {}
45
+
46
+ USMVector (sycl::queue qu, size_t size) : qu_(qu), size_(size) {
47
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
48
+ }
49
+
50
+ USMVector (sycl::queue qu, size_t size, T v) : qu_(qu), size_(size) {
51
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
52
+ qu_.fill (data_.get (), v, size_).wait ();
53
+ }
54
+
55
+ USMVector (sycl::queue qu, const std::vector<T> &vec) : qu_(qu) {
56
+ size_ = vec.size ();
57
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
58
+ std::copy (vec.begin (), vec.end (), data_.get ());
59
+ }
60
+
61
+ USMVector (const USMVector<T>& other) : qu_(other.qu_), size_(other.size_), data_(other.data_) {
62
+ }
63
+
64
+ ~USMVector () {
65
+ }
66
+
67
+ USMVector<T>& operator =(const USMVector<T>& other) {
68
+ qu_ = other.qu_ ;
69
+ size_ = other.size_ ;
70
+ data_ = other.data_ ;
71
+ return *this ;
72
+ }
73
+
74
+ T* Data () { return data_.get (); }
75
+ const T* DataConst () const { return data_.get (); }
76
+
77
+ size_t Size () const { return size_; }
78
+
79
+ T& operator [] (size_t i) { return data_.get ()[i]; }
80
+ const T& operator [] (size_t i) const { return data_.get ()[i]; }
81
+
82
+ T* Begin () const { return data_.get (); }
83
+ T* End () const { return data_.get () + size_; }
84
+
85
+ bool Empty () const { return (size_ == 0 ); }
86
+
87
+ void Clear () {
88
+ data_.reset ();
89
+ size_ = 0 ;
90
+ }
91
+
92
+ void Resize (sycl::queue qu, size_t size_new) {
93
+ qu_ = qu;
94
+ if (size_new <= size_) {
95
+ size_ = size_new;
96
+ } else {
97
+ size_t size_old = size_;
98
+ auto data_old = data_;
99
+ size_ = size_new;
100
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
101
+ if (size_old > 0 ) {
102
+ qu_.memcpy (data_.get (), data_old.get (), sizeof (T) * size_old).wait ();
103
+ }
104
+ }
105
+ }
106
+
107
+ void Resize (sycl::queue qu, size_t size_new, T v) {
108
+ qu_ = qu;
109
+ if (size_new <= size_) {
110
+ size_ = size_new;
111
+ } else {
112
+ size_t size_old = size_;
113
+ auto data_old = data_;
114
+ size_ = size_new;
115
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
116
+ if (size_old > 0 ) {
117
+ qu_.memcpy (data_.get (), data_old.get (), sizeof (T) * size_old).wait ();
118
+ }
119
+ if (size_new > size_old) {
120
+ qu_.fill (data_.get () + size_old, v, size_new - size_old).wait ();
121
+ }
122
+ }
123
+ }
124
+
125
+ void Init (sycl::queue qu, const std::vector<T> &vec) {
126
+ qu_ = qu;
127
+ size_ = vec.size ();
128
+ data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
129
+ std::copy (vec.begin (), vec.end (), data_.get ());
130
+ }
131
+
132
+ using value_type = T; // NOLINT
133
+
134
+ private:
135
+ sycl::queue qu_;
136
+ size_t size_;
137
+ std::shared_ptr<T> data_;
138
+ };
139
+
140
+ /* Wrapper for DMatrix which stores all batches in a single USM buffer */
141
+ struct DeviceMatrixOneAPI {
142
+ DMatrix* p_mat; // Pointer to the original matrix on the host
143
+ sycl::queue qu_;
144
+ USMVector<size_t > row_ptr;
145
+ USMVector<Entry> data;
146
+ size_t total_offset;
147
+
148
+ DeviceMatrixOneAPI (sycl::queue qu, DMatrix* dmat) : p_mat(dmat), qu_(qu) {
149
+ size_t num_row = 0 ;
150
+ size_t num_nonzero = 0 ;
151
+ for (auto &batch : dmat->GetBatches <SparsePage>()) {
152
+ const auto & data_vec = batch.data .HostVector ();
153
+ const auto & offset_vec = batch.offset .HostVector ();
154
+ num_nonzero += data_vec.size ();
155
+ num_row += batch.Size ();
156
+ }
157
+
158
+ row_ptr.Resize (qu_, num_row + 1 );
159
+ data.Resize (qu_, num_nonzero);
160
+
161
+ size_t data_offset = 0 ;
162
+ for (auto &batch : dmat->GetBatches <SparsePage>()) {
163
+ const auto & data_vec = batch.data .HostVector ();
164
+ const auto & offset_vec = batch.offset .HostVector ();
165
+ size_t batch_size = batch.Size ();
166
+ if (batch_size > 0 ) {
167
+ std::copy (offset_vec.data (), offset_vec.data () + batch_size,
168
+ row_ptr.Data () + batch.base_rowid );
169
+ if (batch.base_rowid > 0 ) {
170
+ for (size_t i = 0 ; i < batch_size; i++)
171
+ row_ptr[i + batch.base_rowid ] += batch.base_rowid ;
172
+ }
173
+ std::copy (data_vec.data (), data_vec.data () + offset_vec[batch_size],
174
+ data.Data () + data_offset);
175
+ data_offset += offset_vec[batch_size];
176
+ }
177
+ }
178
+ row_ptr[num_row] = data_offset;
179
+ total_offset = data_offset;
180
+ }
181
+
182
+ ~DeviceMatrixOneAPI () {
183
+ }
184
+ };
185
+
186
+ } // namespace xgboost
187
+
188
+ #endif
0 commit comments