Skip to content

Commit 628d1d2

Browse files
authored
fix: should not use array contains to implement inlist runtime filter (#18574)
* fix * add ut * fix * fix
1 parent 21be76e commit 628d1d2

File tree

2 files changed

+138
-8
lines changed

2 files changed

+138
-8
lines changed

src/query/service/src/pipelines/processors/transforms/hash_join/runtime_filter/convert.rs

Lines changed: 134 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ use databend_common_catalog::runtime_filter_info::RuntimeFilterInfo;
1919
use databend_common_exception::ErrorCode;
2020
use databend_common_exception::Result;
2121
use databend_common_expression::type_check;
22+
use databend_common_expression::types::DataType;
2223
use databend_common_expression::types::NumberDomain;
2324
use databend_common_expression::types::NumberScalar;
2425
use databend_common_expression::Column;
26+
use databend_common_expression::Constant;
2527
use databend_common_expression::Domain;
2628
use databend_common_expression::Expr;
2729
use databend_common_expression::RawExpr;
@@ -71,6 +73,13 @@ pub fn build_runtime_filter_infos(
7173
}
7274

7375
fn build_inlist_filter(inlist: Column, probe_key: &Expr<String>) -> Result<Expr<String>> {
76+
if inlist.len() == 0 {
77+
return Ok(Expr::Constant(Constant {
78+
span: None,
79+
scalar: Scalar::Boolean(false),
80+
data_type: DataType::Boolean,
81+
}));
82+
}
7483
let probe_key = probe_key.as_column_ref().unwrap();
7584

7685
let raw_probe_key = RawExpr::ColumnRef {
@@ -79,20 +88,47 @@ fn build_inlist_filter(inlist: Column, probe_key: &Expr<String>) -> Result<Expr<
7988
data_type: probe_key.data_type.clone(),
8089
display_name: probe_key.display_name.clone(),
8190
};
82-
let array = RawExpr::Constant {
91+
92+
let mut scalars = inlist.iter();
93+
94+
let first_scalar = scalars.next().unwrap();
95+
let first_constant = RawExpr::Constant {
8396
span: None,
84-
scalar: Scalar::Array(inlist),
97+
scalar: first_scalar.to_owned(),
8598
data_type: None,
8699
};
87-
88-
let args = vec![array, raw_probe_key];
89-
let contain_func = RawExpr::FunctionCall {
100+
let mut or_expr = RawExpr::FunctionCall {
90101
span: None,
91-
name: "contains".to_string(),
102+
name: "eq".to_string(),
92103
params: vec![],
93-
args,
104+
args: vec![raw_probe_key.clone(), first_constant],
94105
};
95-
let expr = type_check::check(&contain_func, &BUILTIN_FUNCTIONS)?;
106+
107+
for scalar in scalars {
108+
let constant_expr = RawExpr::Constant {
109+
span: None,
110+
scalar: scalar.to_owned(),
111+
data_type: None,
112+
};
113+
114+
let eq_expr = RawExpr::FunctionCall {
115+
span: None,
116+
name: "eq".to_string(),
117+
params: vec![],
118+
args: vec![raw_probe_key.clone(), constant_expr],
119+
};
120+
121+
or_expr = RawExpr::FunctionCall {
122+
span: None,
123+
name: "or".to_string(),
124+
params: vec![],
125+
args: vec![or_expr, eq_expr],
126+
};
127+
}
128+
129+
let final_expr = or_expr;
130+
131+
let expr = type_check::check(&final_expr, &BUILTIN_FUNCTIONS)?;
96132
Ok(expr)
97133
}
98134

@@ -188,3 +224,93 @@ fn build_bloom_filter(
188224
let filter = BinaryFuse16::try_from(&hashes_vec)?;
189225
Ok((probe_key.id.to_string(), filter))
190226
}
227+
228+
#[cfg(test)]
229+
mod tests {
230+
use std::collections::HashMap;
231+
232+
use databend_common_expression::types::DataType;
233+
use databend_common_expression::types::NumberDataType;
234+
use databend_common_expression::ColumnBuilder;
235+
use databend_common_expression::ColumnRef;
236+
use databend_common_expression::Constant;
237+
use databend_common_expression::ConstantFolder;
238+
use databend_common_expression::Domain;
239+
use databend_common_expression::Expr;
240+
use databend_common_expression::FunctionContext;
241+
use databend_common_expression::Scalar;
242+
use databend_common_functions::BUILTIN_FUNCTIONS;
243+
244+
use super::build_inlist_filter;
245+
246+
#[test]
247+
fn test_build_inlist_filter() {
248+
let func_ctx = FunctionContext::default();
249+
250+
// Create test column with values {1, 10}
251+
let data_type = DataType::Number(NumberDataType::Int32);
252+
let mut builder = ColumnBuilder::with_capacity(&data_type, 2);
253+
builder.push(Scalar::Number(1i32.into()).as_ref());
254+
builder.push(Scalar::Number(10i32.into()).as_ref());
255+
let inlist = builder.build();
256+
257+
// Create probe key expression: column_a
258+
let probe_key = Expr::ColumnRef(ColumnRef {
259+
span: None,
260+
id: "column_a".to_string(),
261+
data_type: data_type.clone(),
262+
display_name: "column_a".to_string(),
263+
});
264+
265+
// Build the filter expression
266+
let filter_expr = build_inlist_filter(inlist, &probe_key).unwrap();
267+
268+
// Test with ConstantFolder - case where column_a in [2,10] (can be folded to constant)
269+
let mut input_domains = HashMap::new();
270+
let domain_value_2_10 = Domain::from_min_max(
271+
Scalar::Number(2i32.into()),
272+
Scalar::Number(10i32.into()),
273+
&data_type,
274+
);
275+
input_domains.insert("column_a".to_string(), domain_value_2_10);
276+
277+
let (folded_expr, _) = ConstantFolder::fold_with_domain(
278+
&filter_expr,
279+
&input_domains,
280+
&func_ctx,
281+
&BUILTIN_FUNCTIONS,
282+
);
283+
284+
// Verify it's not folded to constant
285+
assert!(folded_expr.as_constant().is_none());
286+
287+
// Test with ConstantFolder - case where column_a in [2,9] (should evaluate to false)
288+
let mut input_domains_false = HashMap::new();
289+
let domain_value_2_9 = Domain::from_min_max(
290+
Scalar::Number(2i32.into()),
291+
Scalar::Number(9i32.into()),
292+
&data_type,
293+
);
294+
input_domains_false.insert("column_a".to_string(), domain_value_2_9);
295+
296+
let (folded_expr_false, _) = ConstantFolder::fold_with_domain(
297+
&filter_expr,
298+
&input_domains_false,
299+
&func_ctx,
300+
&BUILTIN_FUNCTIONS,
301+
);
302+
303+
// Range [2,9] does not intersect with {1, 10}, so it should fold to constant false
304+
match folded_expr_false {
305+
Expr::Constant(Constant {
306+
scalar: Scalar::Boolean(false),
307+
..
308+
}) => {
309+
println!("✓ Test passed: column_a in [2,9] correctly evaluated to false");
310+
}
311+
_ => {
312+
panic!("Expected constant false, got: {:?}", folded_expr_false);
313+
}
314+
}
315+
}
316+
}

src/query/storages/fuse/src/pruning/expr_runtime_pruner.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ impl ExprRuntimePruner {
5757

5858
let part = FuseBlockPartInfo::from_part(part)?;
5959
let pruned = self.exprs.iter().any(|filter| {
60+
// If the filter is a constant false, we can prune the partition.
61+
if matches!(filter, Expr::Constant(Constant { scalar: Scalar::Boolean(false), .. })) {
62+
return true;
63+
}
6064
let column_refs = filter.column_refs();
6165
// Currently only support filter with one column (probe key).
6266
debug_assert!(column_refs.len() == 1);

0 commit comments

Comments
 (0)