IntelPython
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/xgboost/generic_parameters.h‎
Lines changed: 9 additions & 0 deletions b/‎include/xgboost/generic_parameters.h‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎plugin/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions b/‎plugin/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎plugin/updater_oneapi/README.md‎
Lines changed: 7 additions & 0 deletions b/‎plugin/updater_oneapi/README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎plugin/updater_oneapi/data_oneapi.h‎
Lines changed: 188 additions & 0 deletions b/‎plugin/updater_oneapi/data_oneapi.h‎
Lines changed: 188 additions & 0 deletions
@@ -158,6 +158,10 @@ if (USE_CUDA)
   endif ()
 endif (USE_CUDA)
 
+if (PLUGIN_UPDATER_ONEAPI)
+  target_compile_definitions(xgboost PRIVATE -DXGBOOST_USE_ONEAPI=1)
+endif (PLUGIN_UPDATER_ONEAPI)
+
 if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
     ((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
       (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
 
@@ -21,6 +21,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
   // Constant representing the device ID of CPU.
   static int32_t constexpr kCpuId = -1;
   static int64_t constexpr kDefaultSeed = 0;
+  static int32_t constexpr kDefaultId = -1;
 
  public:
   GenericParameter();
@@ -38,6 +39,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
   bool fail_on_invalid_gpu_id {false};
   bool validate_parameters {false};
 
+// primary oneAPI device, -1 means default device
+  int device_id;
+
   /*!
    * \brief Configure the parameter `gpu_id'.
    *
@@ -73,6 +77,11 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
     DMLC_DECLARE_FIELD(validate_parameters)
         .set_default(false)
         .describe("Enable checking whether parameters are used or not.");
+
+    DMLC_DECLARE_FIELD(device_id)
+        .set_default(kDefaultId)
+        .set_lower_bound(-1)
+        .describe("The primary oneAPI device ordinal.");
   }
 };
 
 
@@ -4,7 +4,10 @@ endif (PLUGIN_DENSE_PARSER)
 
 if (PLUGIN_UPDATER_ONEAPI)
   add_library(oneapi_plugin OBJECT
+    ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/hist_util_oneapi.cc
     ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/regression_obj_oneapi.cc
+    ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/multiclass_obj_oneapi.cc
+    ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/updater_quantile_hist_oneapi.cc
     ${xgboost_SOURCE_DIR}/plugin/updater_oneapi/predictor_oneapi.cc)
   target_include_directories(oneapi_plugin
     PRIVATE
 
@@ -2,6 +2,12 @@
 This plugin adds support of OneAPI programming model for tree construction and prediction algorithms to XGBoost.
 
 ## Usage
+Specify the 'updater' parameter as one of the following options to offload model training on OneAPI device. 
+
+### Algorithms
+| updater | Description |
+| --- | --- |
+grow_quantile_histmaker_oneapi | model training using OneAPI device  |
 Specify the 'objective' parameter as one of the following options to offload computation of objective function on OneAPI device. 
 
 ### Algorithms
@@ -24,6 +30,7 @@ Please note that parameter names are not finalized and can be changed during fur
 
 Python example:
 ```python
+param['updater'] = 'grow_quantile_histmaker_oneapi'
 param['predictor'] = 'predictor_oneapi'
 param['objective'] = 'reg:squarederror_oneapi'
 ```
 
@@ -0,0 +1,188 @@
+/*!
+ * Copyright by Contributors 2017-2023
+ */
+#ifndef XGBOOST_COMMON_DATA_ONEAPI_H_
+#define XGBOOST_COMMON_DATA_ONEAPI_H_
+
+#include <cstddef>
+#include <limits>
+#include <mutex>
+
+#include "xgboost/base.h"
+#include "xgboost/data.h"
+#include "xgboost/logging.h"
+#include "xgboost/host_device_vector.h"
+
+#include "../../src/common/threading_utils.h"
+
+#include "CL/sycl.hpp"
+
+namespace xgboost {
+
+template <typename T>
+class USMDeleter {
+public:
+  explicit USMDeleter(sycl::queue qu) : qu_(qu) {}
+
+  void operator()(T* data) const {
+    sycl::free(data, qu_);    
+  }
+
+private:
+  sycl::queue qu_;
+};
+
+/* OneAPI implementation of a HostDeviceVector, storing both host and device memory in a single USM buffer.
+   Synchronization between host and device is managed by the compiler runtime. */
+template <typename T>
+class USMVector {
+  static_assert(std::is_standard_layout<T>::value, "USMVector admits only POD types");
+
+public:
+  USMVector() : size_(0), data_(nullptr) {}
+
+  USMVector(sycl::queue qu) : qu_(qu), size_(0), data_(nullptr) {}
+
+  USMVector(sycl::queue qu, size_t size) : qu_(qu), size_(size) {
+    data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+  }
+
+  USMVector(sycl::queue qu, size_t size, T v) : qu_(qu), size_(size) {
+    data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+    qu_.fill(data_.get(), v, size_).wait();
+  }
+
+  USMVector(sycl::queue qu, const std::vector<T> &vec) : qu_(qu) {
+    size_ = vec.size();
+    data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+    std::copy(vec.begin (), vec.end (), data_.get());
+  }
+
+  USMVector(const USMVector<T>& other) : qu_(other.qu_), size_(other.size_), data_(other.data_) {
+  }
+
+  ~USMVector() {
+  }
+
+  USMVector<T>& operator=(const USMVector<T>& other) {
+    qu_ = other.qu_;
+    size_ = other.size_;
+    data_ = other.data_;
+    return *this;
+  }
+
+  T* Data() { return data_.get(); }
+  const T* DataConst() const { return data_.get(); }
+
+  size_t Size() const { return size_; }
+
+  T& operator[] (size_t i) { return data_.get()[i]; }
+  const T& operator[] (size_t i) const { return data_.get()[i]; }
+
+  T* Begin () const { return data_.get(); }
+  T* End () const { return data_.get() + size_; }
+
+  bool Empty() const { return (size_ == 0); }
+
+  void Clear() {
+    data_.reset();
+    size_ = 0;
+  }
+
+  void Resize(sycl::queue qu, size_t size_new) {
+    qu_ = qu;
+    if (size_new <= size_) {
+      size_ = size_new;
+    } else {
+      size_t size_old = size_;
+      auto data_old = data_;
+      size_ = size_new;
+      data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+      if (size_old > 0) {
+        qu_.memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();
+      }
+    }
+  }
+
+  void Resize(sycl::queue qu, size_t size_new, T v) {
+    qu_ = qu;
+    if (size_new <= size_) {
+      size_ = size_new;
+    } else {
+      size_t size_old = size_;
+      auto data_old = data_;
+      size_ = size_new;
+      data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+      if (size_old > 0) {
+        qu_.memcpy(data_.get(), data_old.get(), sizeof(T) * size_old).wait();
+      }
+      if (size_new > size_old) {
+        qu_.fill(data_.get() + size_old, v, size_new - size_old).wait();
+      }
+    }
+  }
+
+  void Init(sycl::queue qu, const std::vector<T> &vec) {
+    qu_ = qu;
+    size_ = vec.size();
+    data_ = std::shared_ptr<T>(sycl::malloc_shared<T>(size_, qu_), USMDeleter<T>(qu_));
+    std::copy(vec.begin(), vec.end(), data_.get());
+  }
+
+  using value_type = T;  // NOLINT
+
+private:
+  sycl::queue qu_;
+  size_t size_;
+  std::shared_ptr<T> data_;
+};
+
+/* Wrapper for DMatrix which stores all batches in a single USM buffer */
+struct DeviceMatrixOneAPI {
+  DMatrix* p_mat;  // Pointer to the original matrix on the host
+  sycl::queue qu_;
+  USMVector<size_t> row_ptr;
+  USMVector<Entry> data;
+  size_t total_offset;
+
+  DeviceMatrixOneAPI(sycl::queue qu, DMatrix* dmat) : p_mat(dmat), qu_(qu) {
+    size_t num_row = 0;
+    size_t num_nonzero = 0;
+    for (auto &batch : dmat->GetBatches<SparsePage>()) {
+      const auto& data_vec = batch.data.HostVector();
+      const auto& offset_vec = batch.offset.HostVector();
+      num_nonzero += data_vec.size();
+      num_row += batch.Size();
+    }
+
+    row_ptr.Resize(qu_, num_row + 1);
+    data.Resize(qu_, num_nonzero);
+
+    size_t data_offset = 0;
+    for (auto &batch : dmat->GetBatches<SparsePage>()) {
+      const auto& data_vec = batch.data.HostVector();
+      const auto& offset_vec = batch.offset.HostVector();
+      size_t batch_size = batch.Size();
+      if (batch_size > 0) {
+        std::copy(offset_vec.data(), offset_vec.data() + batch_size,
+                  row_ptr.Data() + batch.base_rowid);
+        if (batch.base_rowid > 0) {
+          for(size_t i = 0; i < batch_size; i++)
+            row_ptr[i + batch.base_rowid] += batch.base_rowid;
+        }
+        std::copy(data_vec.data(), data_vec.data() + offset_vec[batch_size],
+                  data.Data() + data_offset);
+        data_offset += offset_vec[batch_size];
+      }
+    }
+    row_ptr[num_row] = data_offset;
+    total_offset = data_offset;
+  }
+
+  ~DeviceMatrixOneAPI() {
+  }
+};
+
+}  // namespace xgboost
+
+#endif