Skip to content

Commit 633eaee

Browse files
authored
branch-4.0: [opt](memory) set source content column by column when flush memtable for partial update (#59547)
pick #58782 ### What problem does this PR solve? related issue: #58780 Under the load to 5000-columns wide table, flush memtable consume lots of memory: <img width="526" height="430" alt="image" src="https://github.com/user-attachments/assets/816bcc8b-d9d0-4105-a96b-3fbcc931b0b1" /> Set source content column by column when flush memtable for partial update to solve this problem, the memory usage of `_append_block_with_partial_content` is hardly visible in the profile after optimization. ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test <!-- At least one of them must be included. --> - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason <!-- Add your reason? --> - Behavior changed: - [ ] No. - [ ] Yes. <!-- Explain the behavior change --> - Does this need documentation? - [ ] No. - [ ] Yes. <!-- Add document PR link here. eg: apache/doris-website#1214 --> ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label <!-- Add branch pick label that this PR should merge into -->
1 parent ef65dce commit 633eaee

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -528,15 +528,15 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
528528
for (auto i : including_cids) {
529529
full_block.replace_by_position(i, data.block->get_by_position(input_id++).column);
530530
}
531-
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
532-
&full_block, data.row_pos, data.num_rows, including_cids));
533531

534532
bool have_input_seq_column = false;
535533
// write including columns
536534
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
537535
vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
538536
uint32_t segment_start_pos = 0;
539537
for (auto cid : including_cids) {
538+
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
539+
&full_block, data.row_pos, data.num_rows, std::vector<uint32_t> {cid}));
540540
// here we get segment column row num before append data.
541541
segment_start_pos = cast_set<uint32_t>(_column_writers[cid]->get_next_rowid());
542542
// olap data convertor alway start from id = 0
@@ -554,6 +554,15 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
554554
RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), column->get_data(),
555555
data.num_rows));
556556
RETURN_IF_ERROR(_finalize_column_writer_and_update_meta(cid));
557+
// Don't clear source content for key columns and sequence column here,
558+
// as they will be used later in _full_encode_keys() and _generate_primary_key_index().
559+
// They will be cleared at the end of this method.
560+
bool is_key_column = (cid < _num_sort_key_columns);
561+
bool is_seq_column = (_tablet_schema->has_sequence_col() &&
562+
cid == _tablet_schema->sequence_col_idx() && have_input_seq_column);
563+
if (!is_key_column && !is_seq_column) {
564+
_olap_data_convertor->clear_source_content(cid);
565+
}
557566
}
558567

559568
bool has_default_or_nullable = false;
@@ -629,9 +638,9 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
629638

630639
// convert missing columns and send to column writer
631640
const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids;
632-
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
633-
&full_block, data.row_pos, data.num_rows, missing_cids));
634641
for (auto cid : missing_cids) {
642+
RETURN_IF_ERROR(_olap_data_convertor->set_source_content_with_specifid_columns(
643+
&full_block, data.row_pos, data.num_rows, std::vector<uint32_t> {cid}));
635644
auto [status, column] = _olap_data_convertor->convert_column_data(cid);
636645
if (!status.ok()) {
637646
return status;
@@ -644,6 +653,13 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da
644653
RETURN_IF_ERROR(_column_writers[cid]->append(column->get_nullmap(), column->get_data(),
645654
data.num_rows));
646655
RETURN_IF_ERROR(_finalize_column_writer_and_update_meta(cid));
656+
// Don't clear source content for sequence column here if it will be used later
657+
// in _generate_primary_key_index(). It will be cleared at the end of this method.
658+
bool is_seq_column = (_tablet_schema->has_sequence_col() && !have_input_seq_column &&
659+
cid == _tablet_schema->sequence_col_idx());
660+
if (!is_seq_column) {
661+
_olap_data_convertor->clear_source_content(cid);
662+
}
647663
}
648664

649665
_num_rows_updated += stats.num_rows_updated;

0 commit comments

Comments
 (0)