Skip to content

Commit 7d4aa18

Browse files
committed
[feature](scan) Implement BE-side value predicate pushdown for MOR tables
Enable value column predicates to be pushed down to storage layer for MOR (Merge-On-Read) tables when controlled by the session variable enable_mor_value_predicate_pushdown_tables. This allows inverted indexes on value columns to be utilized for filtering, improving query performance on dedup-only/insert-only MOR workloads. Key changes: - Propagate enable_mor_value_predicate_pushdown flag from thrift scan node through OlapScanner -> ReaderParams -> RowsetReaderContext -> BetaRowsetReader - Extend _should_push_down_value_predicates() to push value predicates for all rowsets when the flag is set - Skip __DORIS_DELETE_SIGN__ predicate during per-segment pushdown to prevent delete markers from being filtered before merge - Revert scan_operator.cpp to only remove VExpr from conjuncts for key columns, preserving VExpr as post-merge safety net for value columns - Add regression tests covering dedup, delete-sign, and delete-predicate scenarios
1 parent a06783e commit 7d4aa18

File tree

8 files changed

+245
-45
lines changed

8 files changed

+245
-45
lines changed

be/src/olap/rowset/beta_rowset_reader.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,8 @@ bool BetaRowsetReader::_should_push_down_value_predicates() const {
356356
(((_rowset->start_version() == 0 || _rowset->start_version() == 2) &&
357357
!_rowset->_rowset_meta->is_segments_overlapping() &&
358358
_read_context->sequence_id_idx == -1) ||
359-
_read_context->enable_unique_key_merge_on_write);
359+
_read_context->enable_unique_key_merge_on_write ||
360+
_read_context->enable_mor_value_predicate_pushdown);
360361
}
361362
#include "common/compile_check_end.h"
362363
} // namespace doris

be/src/olap/rowset/rowset_reader_context.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ struct RowsetReaderContext {
100100
std::shared_ptr<segment_v2::AnnTopNRuntime> ann_topn_runtime;
101101

102102
uint64_t condition_cache_digest = 0;
103+
104+
// When true, push down value predicates for MOR tables
105+
bool enable_mor_value_predicate_pushdown = false;
103106
};
104107

105108
} // namespace doris

be/src/olap/tablet_reader.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) {
208208
_reader_context.merged_rows = &_merged_rows;
209209
_reader_context.delete_bitmap = read_params.delete_bitmap;
210210
_reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write();
211+
_reader_context.enable_mor_value_predicate_pushdown =
212+
read_params.enable_mor_value_predicate_pushdown;
211213
_reader_context.record_rowids = read_params.record_rowids;
212214
_reader_context.rowid_conversion = read_params.rowid_conversion;
213215
_reader_context.is_key_column_group = read_params.is_key_column_group;
@@ -515,9 +517,17 @@ Status TabletReader::_init_conditions_param(const ReaderParams& read_params) {
515517
}
516518
}
517519

520+
int32_t delete_sign_idx = _tablet_schema->delete_sign_idx();
518521
for (auto predicate : predicates) {
519522
auto column = _tablet_schema->column(predicate->column_id());
520523
if (column.aggregation() != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) {
524+
// When MOR value predicate pushdown is enabled, drop __DORIS_DELETE_SIGN__
525+
// from storage-layer predicates entirely. Delete sign must only be evaluated
526+
// post-merge via VExpr to prevent deleted rows from reappearing.
527+
if (_reader_context.enable_mor_value_predicate_pushdown && delete_sign_idx >= 0 &&
528+
predicate->column_id() == static_cast<uint32_t>(delete_sign_idx)) {
529+
continue;
530+
}
521531
_value_col_predicates.push_back(predicate);
522532
} else {
523533
_col_predicates.push_back(predicate);

be/src/olap/tablet_reader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ class TabletReader {
191191

192192
bool is_segcompaction = false;
193193

194+
// Enable value predicate pushdown for MOR tables
195+
bool enable_mor_value_predicate_pushdown = false;
196+
194197
std::vector<RowwiseIteratorUPtr>* segment_iters_ptr = nullptr;
195198

196199
void check_validation() const;

be/src/pipeline/exec/scan_operator.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -439,8 +439,7 @@ Status ScanLocalState<Derived>::_normalize_predicate(vectorized::VExprContext* c
439439
return Status::OK();
440440
}
441441

442-
if (pdt == PushDownType::ACCEPTABLE &&
443-
(_is_key_column(slot->col_name()) || _should_push_down_mor_value_predicate())) {
442+
if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) {
444443
output_expr = nullptr;
445444
return Status::OK();
446445
} else {

be/src/vec/exec/scan/olap_scanner.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,13 @@ Status OlapScanner::_init_tablet_reader_params(
480480
if (!_state->skip_storage_engine_merge()) {
481481
auto* olap_scan_local_state = (pipeline::OlapScanLocalState*)_local_state;
482482
TOlapScanNode& olap_scan_node = olap_scan_local_state->olap_scan_node();
483+
484+
// Set MOR value predicate pushdown flag
485+
if (olap_scan_node.__isset.enable_mor_value_predicate_pushdown &&
486+
olap_scan_node.enable_mor_value_predicate_pushdown) {
487+
_tablet_reader_params.enable_mor_value_predicate_pushdown = true;
488+
}
489+
483490
// order by table keys optimization for topn
484491
// will only read head/tail of data file since it's already sorted by keys
485492
if (olap_scan_node.__isset.sort_info && !olap_scan_node.sort_info.is_asc_order.empty()) {

regression-test/data/data_model_p0/unique/test_mor_value_predicate_pushdown.out

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,69 @@
11
-- This file is automatically generated. You should know what you did if you want to edit this
22
-- !select_disabled --
3+
2 200 world
34
3 300 test
45

56
-- !select_enabled_tablename --
7+
2 200 world
68
3 300 test
79

810
-- !select_enabled_fullname --
11+
2 200 world
912
3 300 test
1013

1114
-- !select_enabled_wildcard --
15+
2 200 world
1216
3 300 test
1317

14-
-- !select_deleted_row --
18+
-- !select_eq_predicate --
19+
2 200 world
1520

1621
-- !select_not_in_list --
22+
2 200 world
1723
3 300 test
1824

19-
-- !select_latest_version --
20-
1 200 second
25+
-- !select_dedup_all --
26+
1 100 first
2127
2 300 third
28+
3 500 fifth
2229

23-
-- !select_old_version --
30+
-- !select_dedup_eq --
31+
2 300 third
32+
33+
-- !select_dedup_none --
34+
35+
-- !select_delete_range --
36+
3 300 test
37+
38+
-- !select_delete_eq --
39+
40+
-- !select_delete_all --
41+
1 100 hello
42+
3 300 test
43+
44+
-- !select_delpred_range --
45+
3 300 test
46+
47+
-- !select_delpred_eq --
48+
49+
-- !select_delpred_all --
50+
1 100 hello
51+
3 300 test
52+
53+
-- !select_update_disabled_old --
54+
55+
-- !select_update_disabled_new --
56+
1 500 new
57+
58+
-- !select_update_enabled_old --
59+
1 100 old
60+
61+
-- !select_update_enabled_new --
62+
1 500 new
2463

25-
-- !select_new_version --
26-
1 200 second
64+
-- !select_update_enabled_range --
65+
1 500 new
66+
3 300 keep
2767

2868
-- !select_multiple_tables --
2969
2 200

0 commit comments

Comments
 (0)