Skip to content

Commit a170737

Browse files
authored
Add does not contain operator to string column filtering (#11349)
1 parent 6c651ab commit a170737

8 files changed

+312
-4
lines changed

crates/viewer/re_dataframe_ui/src/filters/string.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use arrow::datatypes::{DataType, Field};
1010
use datafusion::common::{Column, Result as DataFusionResult, exec_err};
1111
use datafusion::logical_expr::{
1212
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, Expr, ScalarFunctionArgs,
13-
ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, Volatility, col, lit,
13+
ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, Volatility, col, lit, not,
1414
};
1515

1616
use re_ui::SyntaxHighlighting;
@@ -22,6 +22,7 @@ use super::{FilterUiAction, action_from_text_edit_response};
2222
pub enum StringOperator {
2323
#[default]
2424
Contains,
25+
DoesNotContain,
2526
StartsWith,
2627
EndsWith,
2728
}
@@ -30,14 +31,20 @@ impl std::fmt::Display for StringOperator {
3031
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
3132
match self {
3233
Self::Contains => "contains".fmt(f),
34+
Self::DoesNotContain => "does not contain".fmt(f),
3335
Self::StartsWith => "starts with".fmt(f),
3436
Self::EndsWith => "ends with".fmt(f),
3537
}
3638
}
3739
}
3840

3941
impl StringOperator {
40-
pub const ALL: &'static [Self] = &[Self::Contains, Self::StartsWith, Self::EndsWith];
42+
pub const ALL: &'static [Self] = &[
43+
Self::Contains,
44+
Self::DoesNotContain,
45+
Self::StartsWith,
46+
Self::EndsWith,
47+
];
4148
}
4249

4350
#[derive(Debug, Clone, Default, PartialEq, Eq)]
@@ -70,7 +77,17 @@ impl StringFilter {
7077
}
7178

7279
let udf = ScalarUDF::new_from_impl(StringFilterUdf::new(self));
73-
udf.call(vec![col(column.clone())])
80+
let expr = udf.call(vec![col(column.clone())]);
81+
82+
// The udf treats `DoesNotContains` in the same way as `Contains`, so we must apply an
83+
// outer `NOT` (or null) operation. This way, both operators yield complementary results.
84+
let apply_any_or_null_semantics = self.operator() == StringOperator::DoesNotContain;
85+
86+
if apply_any_or_null_semantics {
87+
not(expr.clone()).or(expr.is_null())
88+
} else {
89+
expr
90+
}
7491
}
7592

7693
pub fn popup_ui(
@@ -201,7 +218,8 @@ impl StringFilterUdf {
201218
};
202219

203220
match self.operator {
204-
StringOperator::Contains => {
221+
// Note: reverse ALL-or-none semantics is applied at the expression level.
222+
StringOperator::Contains | StringOperator::DoesNotContain => {
205223
Ok(arrow::compute::contains(haystack_array, needle.as_ref())?)
206224
}
207225
StringOperator::StartsWith => Ok(arrow::compute::starts_with(

crates/viewer/re_dataframe_ui/tests/filter_tests.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,24 @@ async fn test_string_contains() {
620620
TestColumn::strings_nulls(),
621621
"nulls_ends_with_c"
622622
);
623+
624+
filter_snapshot!(
625+
FilterKind::String(StringFilter::new(
626+
StringOperator::DoesNotContain,
627+
"b".to_owned()
628+
)),
629+
TestColumn::strings(),
630+
"does_not_contain_b"
631+
);
632+
633+
filter_snapshot!(
634+
FilterKind::String(StringFilter::new(
635+
StringOperator::DoesNotContain,
636+
"b".to_owned()
637+
)),
638+
TestColumn::strings_nulls(),
639+
"nulls_does_not_contain_b"
640+
);
623641
}
624642

625643
#[tokio::test]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "b",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: Utf8,
15+
nullable: false,
16+
dict_id: 0,
17+
dict_is_ordered: false,
18+
metadata: {},
19+
},
20+
unfiltered: StringArray
21+
[
22+
"a",
23+
"b",
24+
"c",
25+
"ab",
26+
"A B",
27+
"aBc",
28+
],
29+
filtered: StringArray
30+
[
31+
"a",
32+
"c",
33+
],
34+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "b",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: Utf8,
15+
nullable: true,
16+
dict_id: 0,
17+
dict_is_ordered: false,
18+
metadata: {},
19+
},
20+
unfiltered: StringArray
21+
[
22+
"a",
23+
"b",
24+
null,
25+
"ab",
26+
"A B",
27+
"aBc",
28+
],
29+
filtered: StringArray
30+
[
31+
"a",
32+
null,
33+
],
34+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "ab",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: List(
15+
Field {
16+
name: "item",
17+
data_type: Utf8,
18+
nullable: true,
19+
dict_id: 0,
20+
dict_is_ordered: false,
21+
metadata: {},
22+
},
23+
),
24+
nullable: true,
25+
dict_id: 0,
26+
dict_is_ordered: false,
27+
metadata: {},
28+
},
29+
unfiltered: ListArray
30+
[
31+
StringArray
32+
[
33+
"hello_ab",
34+
"a",
35+
],
36+
null,
37+
StringArray
38+
[
39+
"ab",
40+
null,
41+
],
42+
],
43+
filtered: ListArray
44+
[
45+
null,
46+
],
47+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "ab",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: List(
15+
Field {
16+
name: "item",
17+
data_type: Utf8,
18+
nullable: true,
19+
dict_id: 0,
20+
dict_is_ordered: false,
21+
metadata: {},
22+
},
23+
),
24+
nullable: false,
25+
dict_id: 0,
26+
dict_is_ordered: false,
27+
metadata: {},
28+
},
29+
unfiltered: ListArray
30+
[
31+
StringArray
32+
[
33+
"hello_ab",
34+
"a",
35+
],
36+
StringArray
37+
[
38+
"b",
39+
null,
40+
],
41+
StringArray
42+
[
43+
"ab",
44+
null,
45+
],
46+
],
47+
filtered: ListArray
48+
[
49+
StringArray
50+
[
51+
"b",
52+
null,
53+
],
54+
],
55+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "ab",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: List(
15+
Field {
16+
name: "item",
17+
data_type: Utf8,
18+
nullable: false,
19+
dict_id: 0,
20+
dict_is_ordered: false,
21+
metadata: {},
22+
},
23+
),
24+
nullable: false,
25+
dict_id: 0,
26+
dict_is_ordered: false,
27+
metadata: {},
28+
},
29+
unfiltered: ListArray
30+
[
31+
StringArray
32+
[
33+
"hello_ab",
34+
"a",
35+
],
36+
StringArray
37+
[
38+
"b",
39+
"c",
40+
],
41+
StringArray
42+
[
43+
"ab",
44+
"A B",
45+
],
46+
],
47+
filtered: ListArray
48+
[
49+
StringArray
50+
[
51+
"b",
52+
"c",
53+
],
54+
],
55+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
---
2+
source: crates/viewer/re_dataframe_ui/tests/filter_tests.rs
3+
expression: test_results
4+
---
5+
TestResult {
6+
op: String(
7+
StringFilter {
8+
operator: DoesNotContain,
9+
query: "ab",
10+
},
11+
),
12+
field: Field {
13+
name: "column",
14+
data_type: List(
15+
Field {
16+
name: "item",
17+
data_type: Utf8,
18+
nullable: false,
19+
dict_id: 0,
20+
dict_is_ordered: false,
21+
metadata: {},
22+
},
23+
),
24+
nullable: true,
25+
dict_id: 0,
26+
dict_is_ordered: false,
27+
metadata: {},
28+
},
29+
unfiltered: ListArray
30+
[
31+
StringArray
32+
[
33+
"hello_ab",
34+
"a",
35+
],
36+
null,
37+
StringArray
38+
[
39+
"ab",
40+
"A B",
41+
],
42+
],
43+
filtered: ListArray
44+
[
45+
null,
46+
],
47+
}

0 commit comments

Comments
 (0)