From 3a9d2df39d79619d935521f7358892633fd776c4 Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Tue, 6 Aug 2024 02:02:34 -0700 Subject: [PATCH 1/2] minor refactoring; adding tests --- plugin/sycl/tree/hist_updater.cc | 22 +++---- plugin/sycl/tree/hist_updater.h | 4 -- tests/cpp/plugin/test_sycl_hist_updater.cc | 73 ++++++++++++++++++++-- 3 files changed, 77 insertions(+), 22 deletions(-) diff --git a/plugin/sycl/tree/hist_updater.cc b/plugin/sycl/tree/hist_updater.cc index 90d8089e40c4..77da983a8423 100644 --- a/plugin/sycl/tree/hist_updater.cc +++ b/plugin/sycl/tree/hist_updater.cc @@ -112,13 +112,12 @@ void HistUpdater::BuildLocalHistograms( template void HistUpdater::BuildNodeStats( const common::GHistIndexMatrix &gmat, - DMatrix *p_fmat, RegTree *p_tree, const USMVector &gpair) { builder_monitor_.Start("BuildNodeStats"); for (auto const& entry : qexpand_depth_wise_) { int nid = entry.nid; - this->InitNewNode(nid, gmat, gpair, *p_fmat, *p_tree); + this->InitNewNode(nid, gmat, gpair, *p_tree); // add constraints if (!(*p_tree)[nid].IsLeftChild() && !(*p_tree)[nid].IsRoot()) { // it's a right child @@ -232,7 +231,6 @@ void HistUpdater::SplitSiblings( template void HistUpdater::ExpandWithDepthWise( const common::GHistIndexMatrix &gmat, - DMatrix *p_fmat, RegTree *p_tree, const USMVector &gpair) { int num_leaves = 0; @@ -249,7 +247,7 @@ void HistUpdater::ExpandWithDepthWise( hist_rows_adder_->AddHistRows(this, &sync_ids, p_tree); BuildLocalHistograms(gmat, p_tree, gpair); hist_synchronizer_->SyncHistograms(this, sync_ids, p_tree); - BuildNodeStats(gmat, p_fmat, p_tree, gpair); + BuildNodeStats(gmat, p_tree, gpair); EvaluateAndApplySplits(gmat, p_tree, &num_leaves, depth, &temp_qexpand_depth); @@ -270,7 +268,6 @@ void HistUpdater::ExpandWithDepthWise( template void HistUpdater::ExpandWithLossGuide( const common::GHistIndexMatrix& gmat, - DMatrix* p_fmat, RegTree* p_tree, const USMVector &gpair) { builder_monitor_.Start("ExpandWithLossGuide"); @@ -280,10 +277,10 @@ void HistUpdater::ExpandWithLossGuide( ExpandEntry node(ExpandEntry::kRootNid, p_tree->GetDepth(ExpandEntry::kRootNid)); BuildHistogramsLossGuide(node, gmat, p_tree, gpair); - this->InitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_fmat, *p_tree); - + this->InitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_tree); this->EvaluateSplits({node}, gmat, *p_tree); node.split.loss_chg = snode_host_[ExpandEntry::kRootNid].best.loss_chg; + // LOG(FATAL) << node.split.loss_chg; qexpand_loss_guided_->push(node); ++num_leaves; @@ -305,9 +302,7 @@ void HistUpdater::ExpandWithLossGuide( e.best.DefaultLeft(), e.weight, left_leaf_weight, right_leaf_weight, e.best.loss_chg, e.stats.GetHess(), e.best.left_sum.GetHess(), e.best.right_sum.GetHess()); - this->ApplySplit({candidate}, gmat, p_tree); - const int cleft = (*p_tree)[nid].LeftChild(); const int cright = (*p_tree)[nid].RightChild(); @@ -320,8 +315,8 @@ void HistUpdater::ExpandWithLossGuide( BuildHistogramsLossGuide(right_node, gmat, p_tree, gpair); } - this->InitNewNode(cleft, gmat, gpair, *p_fmat, *p_tree); - this->InitNewNode(cright, gmat, gpair, *p_fmat, *p_tree); + this->InitNewNode(cleft, gmat, gpair, *p_tree); + this->InitNewNode(cright, gmat, gpair, *p_tree); bst_uint featureid = snode_host_[nid].best.SplitIndex(); tree_evaluator_.AddSplit(nid, cleft, cright, featureid, snode_host_[cleft].weight, snode_host_[cright].weight); @@ -356,9 +351,9 @@ void HistUpdater::Update( this->InitData(gmat, gpair_device, *p_fmat, *p_tree); if (param_.grow_policy == xgboost::tree::TrainParam::kLossGuide) { - ExpandWithLossGuide(gmat, p_fmat, p_tree, gpair_device); + ExpandWithLossGuide(gmat, p_tree, gpair_device); } else { - ExpandWithDepthWise(gmat, p_fmat, p_tree, gpair_device); + ExpandWithDepthWise(gmat, p_tree, gpair_device); } for (int nid = 0; nid < p_tree->NumNodes(); ++nid) { @@ -842,7 +837,6 @@ void HistUpdater::InitNewNode(int nid, const common::GHistIndexMatrix& gmat, const USMVector &gpair, - const DMatrix& fmat, const RegTree& tree) { builder_monitor_.Start("InitNewNode"); diff --git a/plugin/sycl/tree/hist_updater.h b/plugin/sycl/tree/hist_updater.h index 67f4b0090711..a69f2eea939d 100644 --- a/plugin/sycl/tree/hist_updater.h +++ b/plugin/sycl/tree/hist_updater.h @@ -142,11 +142,9 @@ class HistUpdater { void InitNewNode(int nid, const common::GHistIndexMatrix& gmat, const USMVector &gpair, - const DMatrix& fmat, const RegTree& tree); void ExpandWithDepthWise(const common::GHistIndexMatrix &gmat, - DMatrix *p_fmat, RegTree *p_tree, const USMVector &gpair); @@ -169,7 +167,6 @@ class HistUpdater { RegTree *p_tree); void BuildNodeStats(const common::GHistIndexMatrix &gmat, - DMatrix *p_fmat, RegTree *p_tree, const USMVector &gpair); @@ -188,7 +185,6 @@ class HistUpdater { std::vector* temp_qexpand_depth); void ExpandWithLossGuide(const common::GHistIndexMatrix& gmat, - DMatrix* p_fmat, RegTree* p_tree, const USMVector& gpair); diff --git a/tests/cpp/plugin/test_sycl_hist_updater.cc b/tests/cpp/plugin/test_sycl_hist_updater.cc index 64cabd4052cf..9fc979bd492e 100644 --- a/tests/cpp/plugin/test_sycl_hist_updater.cc +++ b/tests/cpp/plugin/test_sycl_hist_updater.cc @@ -49,9 +49,8 @@ class TestHistUpdater : public HistUpdater { auto TestInitNewNode(int nid, const common::GHistIndexMatrix& gmat, const USMVector &gpair, - const DMatrix& fmat, const RegTree& tree) { - HistUpdater::InitNewNode(nid, gmat, gpair, fmat, tree); + HistUpdater::InitNewNode(nid, gmat, gpair, tree); return HistUpdater::snode_host_[nid]; } @@ -67,6 +66,16 @@ class TestHistUpdater : public HistUpdater { RegTree* p_tree) { HistUpdater::ApplySplit(nodes, gmat, p_tree); } + + auto TestExpandWithLossGuide(const common::GHistIndexMatrix& gmat, + DMatrix *p_fmat, + RegTree* p_tree, + const USMVector &gpair) { + // HistUpdater::tree_evaluator_.Reset(HistUpdater::qu_, + // HistUpdater::param_, + // p_fmat->Info().num_col_); + HistUpdater::ExpandWithLossGuide(gmat, p_tree, gpair); + } }; void GenerateRandomGPairs(::sycl::queue* qu, GradientPair* gpair_ptr, size_t num_rows, bool has_neg_hess) { @@ -295,7 +304,7 @@ void TestHistUpdaterInitNewNode(const xgboost::tree::TrainParam& param, float sp auto& row_idxs = row_set_collection->Data(); const size_t* row_idxs_ptr = row_idxs.DataConst(); updater.TestBuildHistogramsLossGuide(node, gmat, &tree, gpair); - const auto snode = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_fmat, tree); + const auto snode = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, tree); GradStats grad_stat; { @@ -354,7 +363,7 @@ void TestHistUpdaterEvaluateSplits(const xgboost::tree::TrainParam& param) { auto& row_idxs = row_set_collection->Data(); const size_t* row_idxs_ptr = row_idxs.DataConst(); const auto* hist = updater.TestBuildHistogramsLossGuide(node, gmat, &tree, gpair); - const auto snode_init = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, *p_fmat, tree); + const auto snode_init = updater.TestInitNewNode(ExpandEntry::kRootNid, gmat, gpair, tree); const auto snode_updated = updater.TestEvaluateSplits({node}, gmat, tree); auto best_loss_chg = snode_updated[0].best.loss_chg; @@ -479,6 +488,53 @@ void TestHistUpdaterApplySplit(const xgboost::tree::TrainParam& param, float spa } +template +void TestHistUpdaterExpandWithLossGuide(const xgboost::tree::TrainParam& param) { + const size_t num_rows = 3; + const size_t num_columns = 1; + const size_t n_bins = 16; + + Context ctx; + ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); + + DeviceManager device_manager; + auto qu = device_manager.GetQueue(ctx.Device()); + + std::vector data = {7, 3, 15}; + auto p_fmat = GetDMatrixFromData(data, num_rows, num_columns); + + DeviceMatrix dmat; + dmat.Init(qu, p_fmat.get()); + common::GHistIndexMatrix gmat; + gmat.Init(qu, &ctx, dmat, n_bins); + + std::vector gpair_host = {{1, 2}, {3, 1}, {1, 1}}; + USMVector gpair(&qu, gpair_host); + + RegTree tree; + FeatureInteractionConstraintHost int_constraints; + TestHistUpdater updater(&ctx, qu, param, int_constraints, p_fmat.get()); + updater.SetHistSynchronizer(new BatchHistSynchronizer()); + updater.SetHistRowsAdder(new BatchHistRowsAdder()); + auto* row_set_collection = updater.TestInitData(gmat, gpair, *p_fmat, tree); + + updater.TestExpandWithLossGuide(gmat, p_fmat.get(), &tree, gpair); + + const auto& nodes = tree.GetNodes(); + std::vector ans(data.size()); + for (size_t data_idx = 0; data_idx < data.size(); ++data_idx) { + size_t node_idx = 0; + while (!nodes[node_idx].IsLeaf()) { + node_idx = data[data_idx] < nodes[node_idx].SplitCond() ? nodes[node_idx].LeftChild() : nodes[node_idx].RightChild(); + } + ans[data_idx] = nodes[node_idx].LeafValue(); + } + + ASSERT_NEAR(ans[0], -0.15, 1e-6); + ASSERT_NEAR(ans[1], -0.45, 1e-6); + ASSERT_NEAR(ans[2], -0.15, 1e-6); +} + TEST(SyclHistUpdater, Sampling) { xgboost::tree::TrainParam param; param.UpdateAllowUnknown(Args{{"subsample", "0.7"}}); @@ -546,4 +602,13 @@ TEST(SyclHistUpdater, ApplySplitDence) { TestHistUpdaterApplySplit(param, 0.0, (1u << 16) + 1); } +TEST(SyclHistUpdater, ExpandWithLossGuide) { + xgboost::tree::TrainParam param; + param.UpdateAllowUnknown(Args{{"max_depth", "2"}, + {"grow_policy", "lossguide"}}); + + TestHistUpdaterExpandWithLossGuide(param); + TestHistUpdaterExpandWithLossGuide(param); +} + } // namespace xgboost::sycl::tree From a046944f1f4420818ae80968c0d07f232877ec4f Mon Sep 17 00:00:00 2001 From: Dmitry Razdoburdin <> Date: Tue, 6 Aug 2024 02:04:53 -0700 Subject: [PATCH 2/2] remove dead comments --- tests/cpp/plugin/test_sycl_hist_updater.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/cpp/plugin/test_sycl_hist_updater.cc b/tests/cpp/plugin/test_sycl_hist_updater.cc index 9fc979bd492e..e9f3ecfa2132 100644 --- a/tests/cpp/plugin/test_sycl_hist_updater.cc +++ b/tests/cpp/plugin/test_sycl_hist_updater.cc @@ -71,9 +71,6 @@ class TestHistUpdater : public HistUpdater { DMatrix *p_fmat, RegTree* p_tree, const USMVector &gpair) { - // HistUpdater::tree_evaluator_.Reset(HistUpdater::qu_, - // HistUpdater::param_, - // p_fmat->Info().num_col_); HistUpdater::ExpandWithLossGuide(gmat, p_tree, gpair); } };