[backport] Fix page concatenation for validation dataset. (dmlc#11338) (dmlc#11435)

trivialfis · web-flow · commit 10f5f6d3d65e · 2025-05-01T18:59:50.000+08:00
diff --git a/src/data/ellpack_page_source.cu b/src/data/ellpack_page_source.cu
@@ -104,23 +104,29 @@ class EllpackHostCacheStreamImpl {
 
     this->cache_->sizes_orig.push_back(page.Impl()->MemCostBytes());
     auto orig_ptr = this->cache_->sizes_orig.size() - 1;
+    CHECK_EQ(this->cache_->pages.size(), this->cache_->on_device.size());
 
     CHECK_LT(orig_ptr, this->cache_->NumBatchesOrig());
     auto cache_idx = this->cache_->cache_mapping.at(orig_ptr);
     // Wrap up the previous page if this is a new page, or this is the last page.
     auto new_page = cache_idx == this->cache_->pages.size();
-
+    // Last page expected from the user.
     auto last_page = (orig_ptr + 1) == this->cache_->NumBatchesOrig();
-    // No page concatenation is performed. If there's page concatenation, then the number
-    // of pages in the cache must be smaller than the input number of pages.
-    bool no_concat = this->cache_->NumBatchesOrig() == this->cache_->buffer_rows.size();
+
+    bool const no_concat = this->cache_->NoConcat();
+
     // Whether the page should be cached in device. If true, then we don't need to make a
     // copy during write since the temporary page is already in device when page
     // concatenation is enabled.
-    bool to_device = this->cache_->prefer_device &&
-                     this->cache_->NumDevicePages() < this->cache_->max_num_device_pages;
-
-    auto commit_page = [&ctx](EllpackPageImpl const* old_impl) {
+    //
+    // This applies only to a new cached page. If we are concatenating this page to an
+    // existing cached page, then we should respect the existing flag obtained from the
+    // first page of the cached page.
+    bool to_device_if_new_page =
+        this->cache_->prefer_device &&
+        this->cache_->NumDevicePages() < this->cache_->max_num_device_pages;
+
+    auto commit_host_page = [](EllpackPageImpl const* old_impl) {
       CHECK_EQ(old_impl->gidx_buffer.Resource()->Type(), common::ResourceHandler::kCudaMalloc);
       auto new_impl = std::make_unique<EllpackPageImpl>();
       new_impl->CopyInfo(old_impl);
@@ -137,7 +143,7 @@ class EllpackHostCacheStreamImpl {
       auto new_impl = std::make_unique<EllpackPageImpl>();
       new_impl->CopyInfo(page.Impl());
 
-      if (to_device) {
+      if (to_device_if_new_page) {
         // Copy to device
         new_impl->gidx_buffer = common::MakeFixedVecWithCudaMalloc<common::CompressedByteT>(
             page.Impl()->gidx_buffer.size());
@@ -151,15 +157,16 @@ class EllpackHostCacheStreamImpl {
 
       this->cache_->offsets.push_back(new_impl->n_rows * new_impl->info.row_stride);
       this->cache_->pages.push_back(std::move(new_impl));
+      this->cache_->on_device.push_back(to_device_if_new_page);
       return new_page;
     }
 
     if (new_page) {
       // No need to copy if it's already in device.
-      if (!this->cache_->pages.empty() && !to_device) {
+      if (!this->cache_->pages.empty() && !this->cache_->on_device.back()) {
         // Need to wrap up the previous page.
-        auto commited = commit_page(this->cache_->pages.back().get());
-        // Replace the previous page with a new page.
+        auto commited = commit_host_page(this->cache_->pages.back().get());
+        // Replace the previous page (on device) with a new page on host.
         this->cache_->pages.back() = std::move(commited);
       }
       // Push a new page
@@ -174,16 +181,18 @@ class EllpackHostCacheStreamImpl {
       auto offset = new_impl->Copy(&ctx, impl, 0);
 
       this->cache_->offsets.push_back(offset);
+
       this->cache_->pages.push_back(std::move(new_impl));
+      this->cache_->on_device.push_back(to_device_if_new_page);
     } else {
       CHECK(!this->cache_->pages.empty());
       CHECK_EQ(cache_idx, this->cache_->pages.size() - 1);
       auto& new_impl = this->cache_->pages.back();
       auto offset = new_impl->Copy(&ctx, impl, this->cache_->offsets.back());
       this->cache_->offsets.back() += offset;
       // No need to copy if it's already in device.
-      if (last_page && !to_device) {
-        auto commited = commit_page(this->cache_->pages.back().get());
+      if (last_page && !this->cache_->on_device.back()) {
+        auto commited = commit_host_page(this->cache_->pages.back().get());
         this->cache_->pages.back() = std::move(commited);
       }
     }
diff --git a/src/data/ellpack_page_source.h b/src/data/ellpack_page_source.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2024, XGBoost Contributors
+ * Copyright 2019-2025, XGBoost Contributors
  */
 
 #ifndef XGBOOST_DATA_ELLPACK_PAGE_SOURCE_H_
@@ -47,6 +47,7 @@ struct EllpackCacheInfo {
 // This is a memory-based cache. It can be a mixed of the device memory and the host memory.
 struct EllpackMemCache {
   std::vector<std::unique_ptr<EllpackPageImpl>> pages;
+  std::vector<bool> on_device;
   std::vector<std::size_t> offsets;
   // Size of each batch before concatenation.
   std::vector<bst_idx_t> sizes_orig;
@@ -65,6 +66,9 @@ struct EllpackMemCache {
   [[nodiscard]] std::size_t SizeBytes() const;
 
   [[nodiscard]] bool Empty() const { return this->SizeBytes() == 0; }
+  // No page concatenation is performed. If there's page concatenation, then the number of
+  // pages in the cache must be smaller than the input number of pages.
+  [[nodiscard]] bool NoConcat() const { return this->NumBatchesOrig() == this->buffer_rows.size(); }
 
   [[nodiscard]] bst_idx_t NumBatchesOrig() const { return cache_mapping.size(); }
   [[nodiscard]] EllpackPageImpl const* At(std::int32_t k) const;
@@ -187,6 +191,7 @@ class EllpackCacheStreamPolicy : public F<S> {
 
   [[nodiscard]] std::unique_ptr<ReaderT> CreateReader(StringView name, bst_idx_t offset,
                                                       bst_idx_t length) const;
+  std::shared_ptr<EllpackMemCache const> Share() const { return p_cache_; }
 };
 
 template <typename S, template <typename> typename F>
diff --git a/tests/cpp/data/test_ellpack_page_raw_format.cu b/tests/cpp/data/test_ellpack_page_raw_format.cu
@@ -1,9 +1,11 @@
 /**
- * Copyright 2021-2024, XGBoost contributors
+ * Copyright 2021-2025, XGBoost contributors
  */
 #include <gtest/gtest.h>
 #include <xgboost/data.h>
 
+#include <numeric>  // for partial_sum
+
 #include "../../../src/data/ellpack_page.cuh"           // for EllpackPage, GetRowStride
 #include "../../../src/data/ellpack_page_raw_format.h"  // for EllpackPageRawFormat
 #include "../../../src/data/ellpack_page_source.h"      // for EllpackFormatStreamPolicy
@@ -157,4 +159,65 @@ TEST_P(TestEllpackPageRawFormat, HostIO) {
 }
 
 INSTANTIATE_TEST_SUITE_P(EllpackPageRawFormat, TestEllpackPageRawFormat, ::testing::Bool());
+
+TEST(EllpackPageRawFormat, DevicePageConcat) {
+  auto ctx = MakeCUDACtx(0);
+  auto param = BatchParam{256, tree::TrainParam::DftSparseThreshold()};
+  bst_idx_t n_features = 16, n_samples = 128;
+
+  auto test = [&](std::int32_t max_num_device_pages, std::int64_t min_cache_page_bytes) {
+    EllpackCacheInfo cinfo{param, true, max_num_device_pages,
+                           std::numeric_limits<float>::quiet_NaN()};
+    ExternalDataInfo ext_info;
+
+    ext_info.n_batches = 8;
+    ext_info.row_stride = n_features;
+    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {
+      ext_info.base_rowids.push_back(n_samples);
+    }
+    std::partial_sum(ext_info.base_rowids.cbegin(), ext_info.base_rowids.cend(),
+                     ext_info.base_rowids.begin());
+    ext_info.accumulated_rows = n_samples * ext_info.n_batches;
+    ext_info.nnz = ext_info.accumulated_rows * n_features;
+
+    auto p_fmat = RandomDataGenerator{n_samples, n_features, 0}.Seed(0).GenerateDMatrix();
+    EllpackCacheStreamPolicy<EllpackPage, EllpackFormatPolicy> policy;
+
+    for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+      auto cuts = page.Impl()->CutsShared();
+      CalcCacheMapping(&ctx, true, cuts, min_cache_page_bytes, ext_info, &cinfo);
+      [&] {
+        ASSERT_EQ(cinfo.buffer_rows.size(), 4ul);
+      }();
+      policy.SetCuts(page.Impl()->CutsShared(), ctx.Device(), std::move(cinfo));
+    }
+
+    auto format = policy.CreatePageFormat(param);
+
+    // write multipe pages
+    for (bst_idx_t i = 0; i < ext_info.n_batches; ++i) {
+      for (auto const &page : p_fmat->GetBatches<EllpackPage>(&ctx, param)) {
+        auto writer = policy.CreateWriter({}, i);
+        [[maybe_unused]] auto n_bytes = format->Write(page, writer.get());
+      }
+    }
+    // check correct concatenation.
+    auto mem_cache = policy.Share();
+    return mem_cache;
+  };
+
+  {
+    auto mem_cache = test(1, n_features * n_samples);
+    ASSERT_EQ(mem_cache->on_device.size(), 4);
+    ASSERT_TRUE(mem_cache->on_device[0]);
+    ASSERT_EQ(mem_cache->NumDevicePages(), 1);
+  }
+  {
+    auto mem_cache = test(2, n_features * n_samples);
+    ASSERT_EQ(mem_cache->on_device.size(), 4);
+    ASSERT_TRUE(mem_cache->on_device[0]);
+    ASSERT_TRUE(mem_cache->on_device[1]);
+    ASSERT_EQ(mem_cache->NumDevicePages(), 2);
+  }
+}
 }  // namespace xgboost::data