Skip to content

Commit 5710c79

Browse files
authored
Add support for starts with and ends with to string filter (#11341)
1 parent 9869eae commit 5710c79

35 files changed

+1024
-244
lines changed

crates/viewer/re_dataframe_ui/src/filters/boolean.rs

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,7 @@ impl NonNullableBooleanFilter {
5151
self.as_bool().to_string()
5252
}
5353

54-
pub fn popup_ui(
55-
&mut self,
56-
ui: &mut egui::Ui,
57-
column_name: &str,
58-
action: &mut super::FilterUiAction,
59-
) {
54+
pub fn popup_ui(&mut self, ui: &mut egui::Ui, column_name: &str) -> FilterUiAction {
6055
super::basic_operation_ui(ui, column_name, "is");
6156

6257
let mut clicked = false;
@@ -70,7 +65,9 @@ impl NonNullableBooleanFilter {
7065
.clicked();
7166

7267
if clicked {
73-
*action = FilterUiAction::CommitStateToBlueprint;
68+
FilterUiAction::CommitStateToBlueprint
69+
} else {
70+
FilterUiAction::None
7471
}
7572
}
7673
}
@@ -139,12 +136,7 @@ impl NullableBooleanFilter {
139136
}
140137
}
141138

142-
pub fn popup_ui(
143-
&mut self,
144-
ui: &mut egui::Ui,
145-
column_name: &str,
146-
action: &mut super::FilterUiAction,
147-
) {
139+
pub fn popup_ui(&mut self, ui: &mut egui::Ui, column_name: &str) -> FilterUiAction {
148140
super::basic_operation_ui(ui, column_name, "is");
149141

150142
let mut clicked = false;
@@ -160,7 +152,9 @@ impl NullableBooleanFilter {
160152
.clicked();
161153

162154
if clicked {
163-
*action = FilterUiAction::CommitStateToBlueprint;
155+
FilterUiAction::CommitStateToBlueprint
156+
} else {
157+
FilterUiAction::None
164158
}
165159
}
166160
}

crates/viewer/re_dataframe_ui/src/filters/filter.rs

Lines changed: 13 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,14 @@ use datafusion::logical_expr::{
1212
ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, ScalarFunctionArgs, ScalarUDF,
1313
ScalarUDFImpl, Signature, TypeSignature, Volatility,
1414
};
15-
use datafusion::prelude::{Column, Expr, array_to_string, col, contains, lit, lower};
15+
use datafusion::prelude::{Column, Expr, col};
1616

1717
use re_types_core::datatypes::TimeInt;
1818
use re_types_core::{Component as _, FIELD_METADATA_KEY_COMPONENT_TYPE, Loggable as _};
1919

2020
use super::{
21-
FloatFilter, IntFilter, NonNullableBooleanFilter, NullableBooleanFilter, TimestampFilter,
21+
FloatFilter, IntFilter, NonNullableBooleanFilter, NullableBooleanFilter, StringFilter,
22+
TimestampFilter, is_supported_string_datatype,
2223
};
2324

2425
/// The nullability of a nested arrow datatype.
@@ -92,14 +93,14 @@ pub enum FilterError {
9293
#[error("column {0} was not found")]
9394
ColumnNotFound(Column),
9495

95-
#[error("invalid filter kind {0:?} for field {1}")]
96-
InvalidFilterKind(Box<FilterKind>, Box<Field>),
97-
9896
#[error("invalid non-nullable boolean filter {0:?} for field {1}")]
9997
InvalidNonNullableBooleanFilter(NonNullableBooleanFilter, Box<Field>),
10098

10199
#[error("invalid nullable boolean filter {0:?} for field {1}")]
102100
InvalidNullableBooleanFilter(NullableBooleanFilter, Box<Field>),
101+
102+
#[error("invalid string filter {0:?} for field {1}")]
103+
InvalidStringFilter(StringFilter, Box<Field>),
103104
}
104105

105106
/// A filter applied to a table.
@@ -137,10 +138,7 @@ pub enum FilterKind {
137138
NonNullableBoolean(NonNullableBooleanFilter),
138139
Int(IntFilter),
139140
Float(FloatFilter),
140-
141-
//TODO(ab): parameterise that over multiple string ops, e.g. "contains", "starts with", etc.
142-
StringContains(String),
143-
141+
String(StringFilter),
144142
Timestamp(TimestampFilter),
145143
}
146144

@@ -194,9 +192,11 @@ impl FilterKind {
194192
Some(Self::Float(Default::default()))
195193
}
196194

197-
DataType::Utf8 | DataType::Utf8View => Some(Self::StringContains(String::new())),
195+
data_type if is_supported_string_datatype(data_type) => {
196+
Some(Self::String(Default::default()))
197+
}
198198

199-
DataType::Timestamp(_, _) => Some(Self::Timestamp(TimestampFilter::default())),
199+
DataType::Timestamp(_, _) => Some(Self::Timestamp(Default::default())),
200200

201201
_ => None,
202202
}
@@ -225,35 +225,7 @@ impl FilterKind {
225225
Ok(udf.call(vec![col(column.clone())]))
226226
}
227227

228-
Self::StringContains(query_string) => {
229-
if query_string.is_empty() {
230-
return Ok(lit(true));
231-
}
232-
233-
let operand = match field.data_type() {
234-
DataType::Utf8 | DataType::Utf8View => col(column.clone()),
235-
236-
DataType::List(field) | DataType::ListView(field)
237-
if field.data_type() == &DataType::Utf8
238-
|| field.data_type() == &DataType::Utf8View =>
239-
{
240-
// For List[Utf8], we concatenate all the instances into a single logical
241-
// string, separated by a Record Separator (0x1E) character. This ensures
242-
// that the query string doesn't accidentally match a substring spanning
243-
// multiple instances.
244-
array_to_string(col(column.clone()), lit("\u{001E}"))
245-
}
246-
247-
_ => {
248-
return Err(FilterError::InvalidFilterKind(
249-
self.clone().into(),
250-
field.clone().into(),
251-
));
252-
}
253-
};
254-
255-
Ok(contains(lower(operand), lower(lit(query_string))))
256-
}
228+
Self::String(string_filter) => Ok(string_filter.as_filter_expression(column)),
257229
}
258230
}
259231
}
@@ -442,6 +414,7 @@ impl FilterKindUdf {
442414
// 1) Convert the list array to a bool array (with same offsets and nulls)
443415
// 2) Apply the ANY (or, in the future, another) semantics to "merge" each row's instances
444416
// into the final bool.
417+
// TODO(ab): duplicated code with the other UDF, pliz unify.
445418
cast_list_array
446419
.iter()
447420
.map(|maybe_row| {

crates/viewer/re_dataframe_ui/src/filters/filter_ui.rs

Lines changed: 48 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ impl Filter {
315315

316316
impl SyntaxHighlighting for TimestampFormatted<'_, FilterKind> {
317317
fn syntax_highlight_into(&self, builder: &mut SyntaxHighlightedBuilder) {
318+
//TODO(ab): this is weird. this entire impl should be delegated to inner structs
318319
builder.append_keyword(&self.inner.operator_text());
319320
builder.append_keyword(" ");
320321

@@ -335,8 +336,8 @@ impl SyntaxHighlighting for TimestampFormatted<'_, FilterKind> {
335336
builder.append(float_filter);
336337
}
337338

338-
FilterKind::StringContains(query) => {
339-
builder.append_string_value(query);
339+
FilterKind::String(string_filter) => {
340+
builder.append(string_filter);
340341
}
341342

342343
FilterKind::Timestamp(timestamp_filter) => {
@@ -364,51 +365,22 @@ impl FilterKind {
364365
column_name: &str,
365366
popup_just_opened: bool,
366367
) -> FilterUiAction {
367-
let mut action = FilterUiAction::None;
368-
369-
let operator_text = self.operator_text();
370-
371368
// Reduce the default width unnecessarily expands the popup width (queries as usually vers
372369
// small).
373370
ui.spacing_mut().text_edit_width = 150.0;
374371

375-
// TODO(ab): this is getting unwieldy. All arms should have an independent inner struct,
376-
// which all handle their own UI.
377372
match self {
378-
Self::NonNullableBoolean(boolean_filter) => {
379-
boolean_filter.popup_ui(ui, column_name, &mut action);
380-
}
381-
382-
Self::NullableBoolean(boolean_filter) => {
383-
boolean_filter.popup_ui(ui, column_name, &mut action);
384-
}
385-
386-
Self::Int(int_filter) => {
387-
action = int_filter.popup_ui(ui, column_name, popup_just_opened);
388-
}
389-
390-
Self::Float(float_filter) => {
391-
action = float_filter.popup_ui(ui, column_name, popup_just_opened);
392-
}
393-
394-
Self::StringContains(query) => {
395-
basic_operation_ui(ui, column_name, &operator_text);
396-
397-
let response = ui.text_edit_singleline(query);
398-
399-
if popup_just_opened {
400-
response.request_focus();
401-
}
402-
403-
action = action_from_text_edit_response(ui, &response);
373+
Self::NonNullableBoolean(boolean_filter) => boolean_filter.popup_ui(ui, column_name),
374+
Self::NullableBoolean(boolean_filter) => boolean_filter.popup_ui(ui, column_name),
375+
Self::Int(int_filter) => int_filter.popup_ui(ui, column_name, popup_just_opened),
376+
Self::Float(float_filter) => float_filter.popup_ui(ui, column_name, popup_just_opened),
377+
Self::String(string_filter) => {
378+
string_filter.popup_ui(ui, column_name, popup_just_opened)
404379
}
405-
406380
Self::Timestamp(timestamp_filter) => {
407-
timestamp_filter.popup_ui(ui, column_name, &mut action, timestamp_format);
381+
timestamp_filter.popup_ui(ui, column_name, timestamp_format)
408382
}
409383
}
410-
411-
action
412384
}
413385

414386
/// Given a chance to the underlying filter struct to update/clean itself upon committing the
@@ -422,7 +394,7 @@ impl FilterKind {
422394
| Self::NonNullableBoolean(_)
423395
| Self::Int(_)
424396
| Self::Float(_)
425-
| Self::StringContains(_) => {}
397+
| Self::String(_) => {}
426398

427399
Self::Timestamp(timestamp_filter) => timestamp_filter.on_commit(),
428400
}
@@ -433,7 +405,7 @@ impl FilterKind {
433405
match self {
434406
Self::Int(int_filter) => int_filter.comparison_operator().to_string(),
435407
Self::Float(float_filter) => float_filter.comparison_operator().to_string(),
436-
Self::StringContains(_) => "contains".to_owned(),
408+
Self::String(string_filter) => string_filter.operator().to_string(),
437409
Self::NonNullableBoolean(_) | Self::NullableBoolean(_) | Self::Timestamp(_) => {
438410
"is".to_owned()
439411
}
@@ -462,22 +434,23 @@ pub fn action_from_text_edit_response(ui: &egui::Ui, response: &egui::Response)
462434
mod tests {
463435
use super::super::{
464436
ComparisonOperator, FloatFilter, IntFilter, NonNullableBooleanFilter,
465-
NullableBooleanFilter, TimestampFilter,
437+
NullableBooleanFilter, StringFilter, TimestampFilter,
466438
};
467439
use super::*;
440+
use crate::filters::StringOperator;
468441

469442
fn test_cases() -> Vec<(FilterKind, &'static str)> {
470443
// Let's remember to update this test when adding new filter operations.
471444
#[cfg(debug_assertions)]
472445
let _: () = {
473446
use FilterKind::*;
474-
let _op = StringContains(String::new());
447+
let _op = String(Default::default());
475448
match _op {
476449
NonNullableBoolean(_)
477450
| NullableBoolean(_)
478451
| Int(_)
479452
| Float(_)
480-
| StringContains(_)
453+
| String(_)
481454
| Timestamp(_) => {}
482455
}
483456
};
@@ -520,13 +493,17 @@ mod tests {
520493
"float_compares_none",
521494
),
522495
(
523-
FilterKind::StringContains("query".to_owned()),
496+
FilterKind::String(StringFilter::new(StringOperator::Contains, "query")),
524497
"string_contains",
525498
),
526499
(
527-
FilterKind::StringContains(String::new()),
500+
FilterKind::String(StringFilter::new(StringOperator::Contains, "")),
528501
"string_contains_empty",
529502
),
503+
(
504+
FilterKind::String(StringFilter::new(StringOperator::StartsWith, "query")),
505+
"string_starts_with",
506+
),
530507
(
531508
FilterKind::Timestamp(TimestampFilter::after(
532509
jiff::Timestamp::from_millisecond(100_000_000_000).unwrap(),
@@ -607,18 +584,39 @@ mod tests {
607584
let filters = vec![
608585
Filter::new(
609586
"some:column:name",
610-
FilterKind::StringContains("some query string".to_owned()),
587+
FilterKind::String(StringFilter::new(
588+
StringOperator::Contains,
589+
"some query string".to_owned(),
590+
)),
611591
),
612592
Filter::new(
613593
"other:column:name",
614-
FilterKind::StringContains("hello".to_owned()),
594+
FilterKind::String(StringFilter::new(
595+
StringOperator::Contains,
596+
"hello".to_owned(),
597+
)),
598+
),
599+
Filter::new(
600+
"short:name",
601+
FilterKind::String(StringFilter::new(
602+
StringOperator::Contains,
603+
"world".to_owned(),
604+
)),
615605
),
616-
Filter::new("short:name", FilterKind::StringContains("world".to_owned())),
617606
Filter::new(
618607
"looooog:name",
619-
FilterKind::StringContains("some more querying text here".to_owned()),
608+
FilterKind::String(StringFilter::new(
609+
StringOperator::Contains,
610+
"some more querying text here".to_owned(),
611+
)),
612+
),
613+
Filter::new(
614+
"world",
615+
FilterKind::String(StringFilter::new(
616+
StringOperator::Contains,
617+
":wave:".to_owned(),
618+
)),
620619
),
621-
Filter::new("world", FilterKind::StringContains(":wave:".to_owned())),
622620
];
623621

624622
let mut filters = FilterState {

crates/viewer/re_dataframe_ui/src/filters/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@ mod filter;
33
mod filter_ui;
44
mod numerical;
55
mod parse_timestamp;
6+
mod string;
67
mod timestamp;
78
mod timestamp_formatted;
89

910
pub use self::{
10-
boolean::*, filter::*, filter_ui::*, numerical::*, parse_timestamp::*, timestamp::*,
11+
boolean::*, filter::*, filter_ui::*, numerical::*, parse_timestamp::*, string::*, timestamp::*,
1112
timestamp_formatted::*,
1213
};

0 commit comments

Comments
 (0)