Skip to content

Commit a97f379

Browse files
authored
Provide error message when a pool has taken all memory. (dmlc#11173)
1 parent eb980fc commit a97f379

File tree

2 files changed

+7
-1
lines changed

2 files changed

+7
-1
lines changed

src/common/error_msg.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,5 +118,11 @@ constexpr StringView InconsistentFeatureTypes() {
118118
}
119119

120120
void CheckOldNccl(std::int32_t major, std::int32_t minor, std::int32_t patch);
121+
122+
constexpr StringView ZeroCudaMemory() {
123+
return "No GPU memory is left, are you using RMM? If so, please install XGBoost with RMM "
124+
"support. If you are using other types of memory pool, please consider reserving a "
125+
"portion of the GPU memory for XGBoost.";
126+
}
121127
} // namespace xgboost::error
122128
#endif // XGBOOST_COMMON_ERROR_MSG_H_

src/common/hist_util.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,12 @@ bst_idx_t SketchBatchNumElements(bst_idx_t sketch_batch_num_elements, SketchShap
9898
return std::min(static_cast<bst_idx_t>(n_max_used_f32), shape.nnz);
9999
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
100100
(void)container_bytes; // We known the remaining size when RMM is not used.
101-
102101
if (sketch_batch_num_elements == detail::UnknownSketchNumElements()) {
103102
auto required_memory =
104103
RequiredMemory(shape.n_samples, shape.n_features, shape.nnz, num_cuts, has_weight);
105104
// use up to 80% of available space
106105
auto avail = dh::AvailableMemory(device) * 0.8;
106+
CHECK_GT(avail, 0) << error::ZeroCudaMemory();
107107
if (required_memory > avail) {
108108
sketch_batch_num_elements = avail / BytesPerElement(has_weight);
109109
} else {

0 commit comments

Comments
 (0)