Skip to content

Commit afe673b

Browse files
authored
feat(query): Vector index support filter pushdown (#18516)
* feat(query): Vector index support filter pushdown * add tests * add tests
1 parent 8b928fa commit afe673b

File tree

10 files changed

+846
-202
lines changed

10 files changed

+846
-202
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/query/ee/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ typetag = { workspace = true }
6666
uuid = { workspace = true }
6767

6868
[dev-dependencies]
69+
databend-common-functions = { workspace = true }
6970
jsonb = { workspace = true }
7071
tantivy = { workspace = true }
7172

src/query/ee/tests/it/vector_index/pruning.rs

Lines changed: 302 additions & 30 deletions
Large diffs are not rendered by default.

src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use crate::optimizer::optimizers::rule::RulePushDownLimitWindow;
5252
use crate::optimizer::optimizers::rule::RulePushDownPrewhere;
5353
use crate::optimizer::optimizers::rule::RulePushDownRankLimitAggregate;
5454
use crate::optimizer::optimizers::rule::RulePushDownSortEvalScalar;
55+
use crate::optimizer::optimizers::rule::RulePushDownSortFilterScan;
5556
use crate::optimizer::optimizers::rule::RulePushDownSortScan;
5657
use crate::optimizer::optimizers::rule::RuleSemiToInnerJoin;
5758
use crate::optimizer::optimizers::rule::RuleSplitAggregate;
@@ -79,6 +80,7 @@ impl RuleFactory {
7980
RuleID::PushDownLimitUnion => Ok(Box::new(RulePushDownLimitUnion::new())),
8081
RuleID::PushDownLimitScan => Ok(Box::new(RulePushDownLimitScan::new())),
8182
RuleID::PushDownSortScan => Ok(Box::new(RulePushDownSortScan::new())),
83+
RuleID::PushDownSortFilterScan => Ok(Box::new(RulePushDownSortFilterScan::new())),
8284
RuleID::PushDownSortEvalScalar => {
8385
Ok(Box::new(RulePushDownSortEvalScalar::new(metadata)))
8486
}

src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ mod rule_push_down_filter_window;
2626
mod rule_push_down_filter_window_top_n;
2727
mod rule_push_down_prewhere;
2828
mod rule_push_down_sort_expression;
29+
mod rule_push_down_sort_filter_scan;
2930
mod rule_push_down_sort_scan;
3031

3132
pub use rule_eliminate_filter::RuleEliminateFilter;
@@ -42,4 +43,5 @@ pub use rule_push_down_filter_window::RulePushDownFilterWindow;
4243
pub use rule_push_down_filter_window_top_n::RulePushDownFilterWindowTopN;
4344
pub use rule_push_down_prewhere::RulePushDownPrewhere;
4445
pub use rule_push_down_sort_expression::RulePushDownSortEvalScalar;
46+
pub use rule_push_down_sort_filter_scan::RulePushDownSortFilterScan;
4547
pub use rule_push_down_sort_scan::RulePushDownSortScan;

src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_push_down_prewhere.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ use crate::IndexType;
3838
use crate::MetadataRef;
3939
use crate::Visibility;
4040

41+
/// Input: Filter
42+
/// \
43+
/// Scan
44+
///
45+
/// Output:
46+
/// Filter
47+
/// \
48+
/// Scan(padding prewhere)
4149
pub struct RulePushDownPrewhere {
4250
id: RuleID,
4351
matchers: Vec<Matcher>,
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::sync::Arc;
16+
17+
use databend_common_exception::Result;
18+
19+
use crate::optimizer::ir::Matcher;
20+
use crate::optimizer::ir::SExpr;
21+
use crate::optimizer::optimizers::rule::Rule;
22+
use crate::optimizer::optimizers::rule::RuleID;
23+
use crate::optimizer::optimizers::rule::TransformResult;
24+
use crate::plans::Filter;
25+
use crate::plans::RelOp;
26+
use crate::plans::RelOperator;
27+
use crate::plans::Scan;
28+
use crate::plans::Sort;
29+
30+
/// Input:
31+
/// (1) Sort
32+
/// \
33+
/// Filter
34+
/// \
35+
/// Scan
36+
/// (2) Sort
37+
/// \
38+
/// EvalScalar
39+
/// \
40+
/// Filter
41+
/// \
42+
/// Scan
43+
///
44+
/// Output:
45+
/// (1) Sort
46+
/// \
47+
/// Filter
48+
/// \
49+
/// Scan(padding order_by and limit)
50+
/// (2) Sort
51+
/// \
52+
/// EvalScalar
53+
/// \
54+
/// Filter
55+
/// \
56+
/// Scan(padding order_by and limit)
57+
pub struct RulePushDownSortFilterScan {
58+
id: RuleID,
59+
matchers: Vec<Matcher>,
60+
}
61+
62+
impl RulePushDownSortFilterScan {
63+
pub fn new() -> Self {
64+
Self {
65+
id: RuleID::PushDownSortFilterScan,
66+
matchers: vec![
67+
Matcher::MatchOp {
68+
op_type: RelOp::Sort,
69+
children: vec![Matcher::MatchOp {
70+
op_type: RelOp::Filter,
71+
children: vec![Matcher::MatchOp {
72+
op_type: RelOp::Scan,
73+
children: vec![],
74+
}],
75+
}],
76+
},
77+
Matcher::MatchOp {
78+
op_type: RelOp::Sort,
79+
children: vec![Matcher::MatchOp {
80+
op_type: RelOp::EvalScalar,
81+
children: vec![Matcher::MatchOp {
82+
op_type: RelOp::Filter,
83+
children: vec![Matcher::MatchOp {
84+
op_type: RelOp::Scan,
85+
children: vec![],
86+
}],
87+
}],
88+
}],
89+
},
90+
],
91+
}
92+
}
93+
}
94+
95+
impl Rule for RulePushDownSortFilterScan {
96+
fn id(&self) -> RuleID {
97+
self.id
98+
}
99+
100+
fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> {
101+
let sort: Sort = s_expr.plan().clone().try_into()?;
102+
let child = s_expr.child(0)?;
103+
let (eval_scalar, filter, mut scan) = match child.plan() {
104+
RelOperator::Filter(filter) => {
105+
let grand_child = child.child(0)?;
106+
let scan: Scan = grand_child.plan().clone().try_into()?;
107+
(None, filter.clone(), scan)
108+
}
109+
RelOperator::EvalScalar(eval_scalar) => {
110+
let child = child.child(0)?;
111+
let filter: Filter = child.plan().clone().try_into()?;
112+
let grand_child = child.child(0)?;
113+
let scan: Scan = grand_child.plan().clone().try_into()?;
114+
(Some(eval_scalar.clone()), filter, scan)
115+
}
116+
_ => unreachable!(),
117+
};
118+
119+
// The following conditions must be met push down filter and sort for vector index:
120+
// 1. Scan must contain `vector_index`, because .
121+
// 2. The number of `push_down_predicates` in Scan must be the same as the number of `predicates`
122+
// in Filter to ensure that all filter conditions are pushed down.
123+
// (Filter `predicates` has been pushed down in `RulePushDownFilterScan` rule.)
124+
// 3. Sort must have limit in order to prune unused blocks.
125+
let push_down_predicates = scan.push_down_predicates.clone().unwrap_or_default();
126+
if scan.vector_index.is_none()
127+
|| push_down_predicates.len() != filter.predicates.len()
128+
|| sort.limit.is_none()
129+
{
130+
return Ok(());
131+
}
132+
133+
scan.order_by = Some(sort.items);
134+
scan.limit = sort.limit;
135+
136+
let new_scan = SExpr::create_leaf(Arc::new(RelOperator::Scan(scan)));
137+
138+
let mut result = if eval_scalar.is_some() {
139+
let grandchild = child.child(0)?;
140+
let new_filter = grandchild.replace_children(vec![Arc::new(new_scan)]);
141+
let new_eval_scalar = child.replace_children(vec![Arc::new(new_filter)]);
142+
s_expr.replace_children(vec![Arc::new(new_eval_scalar)])
143+
} else {
144+
let new_filter = child.replace_children(vec![Arc::new(new_scan)]);
145+
s_expr.replace_children(vec![Arc::new(new_filter)])
146+
};
147+
148+
result.set_applied_rule(&self.id);
149+
state.add_result(result);
150+
Ok(())
151+
}
152+
153+
fn matchers(&self) -> &[Matcher] {
154+
&self.matchers
155+
}
156+
}
157+
158+
impl Default for RulePushDownSortFilterScan {
159+
fn default() -> Self {
160+
Self::new()
161+
}
162+
}

src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub static DEFAULT_REWRITE_RULES: LazyLock<Vec<RuleID>> = LazyLock::new(|| {
5858
RuleID::PushDownFilterScan,
5959
RuleID::PushDownPrewhere, /* PushDownPrwhere should be after all rules except PushDownFilterScan */
6060
RuleID::PushDownSortScan, // PushDownSortScan should be after PushDownPrewhere
61+
RuleID::PushDownSortFilterScan, // PushDownSortFilterScan should be after PushDownFilterScan
6162
RuleID::GroupingSetsToUnion,
6263
]
6364
});
@@ -107,6 +108,7 @@ pub enum RuleID {
107108
PushDownLimitScan,
108109
PushDownSortEvalScalar,
109110
PushDownSortScan,
111+
PushDownSortFilterScan,
110112
SemiToInnerJoin,
111113
EliminateEvalScalar,
112114
EliminateFilter,
@@ -148,6 +150,7 @@ impl Display for RuleID {
148150
RuleID::PushDownFilterAggregate => write!(f, "PushDownFilterAggregate"),
149151
RuleID::PushDownLimitScan => write!(f, "PushDownLimitScan"),
150152
RuleID::PushDownSortScan => write!(f, "PushDownSortScan"),
153+
RuleID::PushDownSortFilterScan => write!(f, "PushDownSortFilterScan"),
151154
RuleID::PushDownSortEvalScalar => write!(f, "PushDownSortEvalScalar"),
152155
RuleID::PushDownLimitWindow => write!(f, "PushDownLimitWindow"),
153156
RuleID::PushDownFilterWindow => write!(f, "PushDownFilterWindow"),

0 commit comments

Comments
 (0)