Skip to content

Commit 98ca256

Browse files
authored
[Enhancement](file-cache) Add fine-grained control for compaction file cache (#60609)
Support selective caching of index files only during compaction in cloud mode. Changes: - Add two mBool configs to control index-only caching for base and cumulative compaction: * enable_base_compaction_output_write_index_only (default: false) * enable_cumu_compaction_output_write_index_only (default: false) - Extend RowsetWriterContext with compaction_output_write_index_only field to mark whether only index files should be cached - Modify get_file_writer_options() to accept is_index_file parameter: * When compaction_output_write_index_only=true and is_index_file=false, set write_file_cache=false to skip caching data files * Index files continue to be cached normally - Update file writer creation call sites to pass is_index_file parameter: * Index file writers: pass true * Segment (data) file writers: pass false Benefits: - Reduces cache pressure by avoiding caching large data files during compaction - Preserves index file caching for query performance - Provides separate control for base and cumulative compaction strategies - Maintains backward compatibility with default settings ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [x] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [x] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [x] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into -->
1 parent 3266935 commit 98ca256

File tree

7 files changed

+469
-14
lines changed

7 files changed

+469
-14
lines changed

be/src/cloud/config.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,5 +155,8 @@ DEFINE_mBool(enable_cache_read_from_peer, "true");
155155
// If the value is -1, use the `rehash_tablet_after_be_dead_seconds` setting in the `fe` configuration as the expiration time.
156156
DEFINE_mInt64(cache_read_from_peer_expired_seconds, "-1");
157157

158+
DEFINE_mBool(enable_file_cache_write_base_compaction_index_only, "false");
159+
DEFINE_mBool(enable_file_cache_write_cumu_compaction_index_only, "false");
160+
158161
#include "common/compile_check_end.h"
159162
} // namespace doris::config

be/src/cloud/config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,5 +195,11 @@ DECLARE_mBool(enable_cache_read_from_peer);
195195

196196
DECLARE_mInt64(cache_read_from_peer_expired_seconds);
197197

198+
// Base compaction output: only write index files to file cache, not data files
199+
DECLARE_mBool(enable_file_cache_write_base_compaction_index_only);
200+
201+
// Cumulative compaction output: only write index files to file cache, not data files
202+
DECLARE_mBool(enable_file_cache_write_cumu_compaction_index_only);
203+
198204
#include "common/compile_check_end.h"
199205
} // namespace doris::config

be/src/olap/compaction.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,32 @@ using std::vector;
8888

8989
namespace doris {
9090
using namespace ErrorCode;
91+
92+
// Determine whether to enable index-only file cache mode for compaction output.
93+
// This function decides if only index files should be written to cache, based on:
94+
// - write_file_cache: whether file cache is enabled
95+
// - compaction_type: type of compaction (base or cumulative)
96+
// - enable_base_index_only: config flag for base compaction
97+
// - enable_cumu_index_only: config flag for cumulative compaction
98+
// Returns true if index-only mode should be enabled, false otherwise.
99+
bool should_enable_compaction_cache_index_only(bool write_file_cache, ReaderType compaction_type,
100+
bool enable_base_index_only,
101+
bool enable_cumu_index_only) {
102+
if (!write_file_cache) {
103+
return false;
104+
}
105+
106+
if (compaction_type == ReaderType::READER_BASE_COMPACTION && enable_base_index_only) {
107+
return true;
108+
}
109+
110+
if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION && enable_cumu_index_only) {
111+
return true;
112+
}
113+
114+
return false;
115+
}
116+
91117
namespace {
92118
#include "common/compile_check_begin.h"
93119

@@ -1749,6 +1775,13 @@ Status CloudCompactionMixin::construct_output_rowset_writer(RowsetWriterContext&
17491775
ctx.write_file_cache = should_cache_compaction_output();
17501776
ctx.file_cache_ttl_sec = _tablet->ttl_seconds();
17511777
ctx.approximate_bytes_to_write = _input_rowsets_total_size;
1778+
1779+
// Set fine-grained control: only write index files to cache if configured
1780+
ctx.compaction_output_write_index_only = should_enable_compaction_cache_index_only(
1781+
ctx.write_file_cache, compaction_type(),
1782+
config::enable_file_cache_write_base_compaction_index_only,
1783+
config::enable_file_cache_write_cumu_compaction_index_only);
1784+
17521785
ctx.tablet = _tablet;
17531786
ctx.job_id = _uuid;
17541787

be/src/olap/rowset/beta_rowset_writer.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,8 +1050,9 @@ Status BaseBetaRowsetWriter::_build_tmp(RowsetSharedPtr& rowset_ptr) {
10501050
}
10511051

10521052
Status BaseBetaRowsetWriter::_create_file_writer(const std::string& path,
1053-
io::FileWriterPtr& file_writer) {
1054-
io::FileWriterOptions opts = _context.get_file_writer_options();
1053+
io::FileWriterPtr& file_writer,
1054+
bool is_index_file) {
1055+
io::FileWriterOptions opts = _context.get_file_writer_options(is_index_file);
10551056
Status st = _context.fs()->create_file(path, &file_writer, &opts);
10561057
if (!st.ok()) {
10571058
LOG(WARNING) << "failed to create writable file. path=" << path << ", err: " << st;
@@ -1069,9 +1070,9 @@ Status BaseBetaRowsetWriter::create_file_writer(uint32_t segment_id, io::FileWri
10691070
std::string prefix =
10701071
std::string {InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)};
10711072
std::string index_path = InvertedIndexDescriptor::get_index_file_path_v2(prefix);
1072-
return _create_file_writer(index_path, file_writer);
1073+
return _create_file_writer(index_path, file_writer, true /* is_index_file */);
10731074
} else if (file_type == FileType::SEGMENT_FILE) {
1074-
return _create_file_writer(segment_path, file_writer);
1075+
return _create_file_writer(segment_path, file_writer, false /* is_index_file */);
10751076
}
10761077
return Status::Error<ErrorCode::INTERNAL_ERROR>(
10771078
fmt::format("failed to create file = {}, file type = {}", segment_path, file_type));
@@ -1081,7 +1082,8 @@ Status BaseBetaRowsetWriter::create_index_file_writer(uint32_t segment_id,
10811082
IndexFileWriterPtr* index_file_writer) {
10821083
RETURN_IF_ERROR(RowsetWriter::create_index_file_writer(segment_id, index_file_writer));
10831084
// used for inverted index format v1
1084-
(*index_file_writer)->set_file_writer_opts(_context.get_file_writer_options());
1085+
(*index_file_writer)
1086+
->set_file_writer_opts(_context.get_file_writer_options(true /* is_index_file */));
10851087
return Status::OK();
10861088
}
10871089

@@ -1091,7 +1093,7 @@ Status BetaRowsetWriter::create_segment_writer_for_segcompaction(
10911093
std::string path = BetaRowset::local_segment_path_segcompacted(_context.tablet_path,
10921094
_context.rowset_id, begin, end);
10931095
io::FileWriterPtr file_writer;
1094-
RETURN_IF_ERROR(_create_file_writer(path, file_writer));
1096+
RETURN_IF_ERROR(_create_file_writer(path, file_writer, false /* is_index_file */));
10951097

10961098
IndexFileWriterPtr index_file_writer;
10971099
if (_context.tablet_schema->has_inverted_index() || _context.tablet_schema->has_ann_index()) {
@@ -1100,7 +1102,8 @@ Status BetaRowsetWriter::create_segment_writer_for_segcompaction(
11001102
if (_context.tablet_schema->get_inverted_index_storage_format() !=
11011103
InvertedIndexStorageFormatPB::V1) {
11021104
std::string index_path = InvertedIndexDescriptor::get_index_file_path_v2(prefix);
1103-
RETURN_IF_ERROR(_create_file_writer(index_path, idx_file_writer));
1105+
RETURN_IF_ERROR(
1106+
_create_file_writer(index_path, idx_file_writer, true /* is_index_file */));
11041107
}
11051108
index_file_writer = std::make_unique<IndexFileWriter>(
11061109
_context.fs(), prefix, _context.rowset_id.to_string(), _num_segcompacted,

be/src/olap/rowset/beta_rowset_writer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ class BaseBetaRowsetWriter : public RowsetWriter {
202202
protected:
203203
Status _generate_delete_bitmap(int32_t segment_id);
204204
virtual Status _build_rowset_meta(RowsetMeta* rowset_meta, bool check_segment_num = false);
205-
Status _create_file_writer(const std::string& path, io::FileWriterPtr& file_writer);
205+
Status _create_file_writer(const std::string& path, io::FileWriterPtr& file_writer,
206+
bool is_index_file = false);
206207
virtual Status _close_file_writers();
207208
virtual Status _check_segment_number_limit(size_t segnum);
208209
virtual int64_t _num_seg() const;

be/src/olap/rowset/rowset_writer_context.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ struct RowsetWriterContext {
106106
bool is_hot_data = false;
107107
uint64_t file_cache_ttl_sec = 0;
108108
uint64_t approximate_bytes_to_write = 0;
109+
// If true, compaction output only writes index files to file cache, not data files
110+
bool compaction_output_write_index_only = false;
109111
/// end file cache opts
110112

111113
// segcompaction for this RowsetWriter, only enabled when importing data
@@ -234,13 +236,17 @@ struct RowsetWriterContext {
234236

235237
io::FileSystem& fs_ref() const { return *fs(); }
236238

237-
io::FileWriterOptions get_file_writer_options() {
238-
io::FileWriterOptions opts {.write_file_cache = write_file_cache,
239-
.is_cold_data = is_hot_data,
240-
.file_cache_expiration_time = file_cache_ttl_sec,
241-
.approximate_bytes_to_write = approximate_bytes_to_write};
239+
io::FileWriterOptions get_file_writer_options(bool is_index_file = false) {
240+
bool should_write_cache = write_file_cache;
241+
// If configured to only write index files to cache, skip cache for data files
242+
if (compaction_output_write_index_only && !is_index_file) {
243+
should_write_cache = false;
244+
}
242245

243-
return opts;
246+
return io::FileWriterOptions {.write_file_cache = should_write_cache,
247+
.is_cold_data = is_hot_data,
248+
.file_cache_expiration_time = file_cache_ttl_sec,
249+
.approximate_bytes_to_write = approximate_bytes_to_write};
244250
}
245251
};
246252

0 commit comments

Comments
 (0)