Skip to content

Commit 7397282

Browse files
authored
Data Explorer: Implement SearchSchema RPC, refactor and improve data explorer test suite (#889)
Addresses posit-dev/positron#8805, follows backend API change in posit-dev/positron#8810. Also pulls in variables comm changes which can be implemented in a follow up PR
1 parent 8a51524 commit 7397282

File tree

5 files changed

+1201
-954
lines changed

5 files changed

+1201
-954
lines changed

crates/amalthea/src/comm/data_explorer_comm.rs

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,9 @@ pub struct OpenDatasetResult {
2121
/// Result in Methods
2222
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
2323
pub struct SearchSchemaResult {
24-
/// A schema containing matching columns up to the max_results limit
25-
pub matches: TableSchema,
26-
27-
/// The total number of columns matching the filter
28-
pub total_num_matches: i64
24+
/// The column indices of the matching column indices in the indicated
25+
/// sort order
26+
pub matches: Vec<i64>
2927
}
3028

3129
/// Exported result
@@ -691,6 +689,22 @@ pub struct ColumnSelection {
691689
pub spec: ArraySelection
692690
}
693691

692+
/// Possible values for SortOrder in SearchSchema
693+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display)]
694+
pub enum SearchSchemaSortOrder {
695+
#[serde(rename = "original")]
696+
#[strum(to_string = "original")]
697+
Original,
698+
699+
#[serde(rename = "ascending")]
700+
#[strum(to_string = "ascending")]
701+
Ascending,
702+
703+
#[serde(rename = "descending")]
704+
#[strum(to_string = "descending")]
705+
Descending
706+
}
707+
694708
/// Possible values for ColumnDisplayType
695709
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, strum_macros::Display)]
696710
pub enum ColumnDisplayType {
@@ -1059,15 +1073,12 @@ pub struct GetSchemaParams {
10591073
/// Parameters for the SearchSchema method.
10601074
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
10611075
pub struct SearchSchemaParams {
1062-
/// Column filters to apply when searching
1076+
/// Column filters to apply when searching, can be empty
10631077
pub filters: Vec<ColumnFilter>,
10641078

1065-
/// Index (starting from zero) of first result to fetch (for paging)
1066-
pub start_index: i64,
1067-
1068-
/// Maximum number of resulting column schemas to fetch from the start
1069-
/// index
1070-
pub max_results: i64,
1079+
/// How to sort results: original in-schema order, alphabetical ascending
1080+
/// or descending
1081+
pub sort_order: SearchSchemaSortOrder,
10711082
}
10721083

10731084
/// Parameters for the GetDataValues method.
@@ -1181,10 +1192,9 @@ pub enum DataExplorerBackendRequest {
11811192
#[serde(rename = "get_schema")]
11821193
GetSchema(GetSchemaParams),
11831194

1184-
/// Search full, unfiltered table schema with column filters
1195+
/// Search table schema with column filters, optionally sort results
11851196
///
1186-
/// Search full, unfiltered table schema for column names matching one or
1187-
/// more column filters
1197+
/// Search table schema with column filters, optionally sort results
11881198
#[serde(rename = "search_schema")]
11891199
SearchSchema(SearchSchemaParams),
11901200

crates/amalthea/src/comm/variables_comm.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,22 @@ pub struct FormattedVariable {
4444
pub content: String
4545
}
4646

47+
/// Result of the summarize operation
48+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
49+
pub struct QueryTableSummaryResult {
50+
/// The total number of rows in the table.
51+
pub num_rows: i64,
52+
53+
/// The total number of columns in the table.
54+
pub num_columns: i64,
55+
56+
/// The column schemas in the table.
57+
pub column_schemas: Vec<String>,
58+
59+
/// The column profiles in the table.
60+
pub column_profiles: Vec<String>
61+
}
62+
4763
/// A single variable in the runtime.
4864
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
4965
pub struct Variable {
@@ -196,6 +212,16 @@ pub struct ViewParams {
196212
pub path: Vec<String>,
197213
}
198214

215+
/// Parameters for the QueryTableSummary method.
216+
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
217+
pub struct QueryTableSummaryParams {
218+
/// The path to the table to summarize, as an array of access keys.
219+
pub path: Vec<String>,
220+
221+
/// A list of query types.
222+
pub query_types: Vec<String>,
223+
}
224+
199225
/// Parameters for the Update method.
200226
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
201227
pub struct UpdateParams {
@@ -271,6 +297,12 @@ pub enum VariablesBackendRequest {
271297
#[serde(rename = "view")]
272298
View(ViewParams),
273299

300+
/// Query table summary
301+
///
302+
/// Request a data summary for a table variable.
303+
#[serde(rename = "query_table_summary")]
304+
QueryTableSummary(QueryTableSummaryParams),
305+
274306
}
275307

276308
/**
@@ -297,6 +329,9 @@ pub enum VariablesBackendReply {
297329
/// The ID of the viewer that was opened.
298330
ViewReply(Option<String>),
299331

332+
/// Result of the summarize operation
333+
QueryTableSummaryReply(QueryTableSummaryResult),
334+
300335
}
301336

302337
/**

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 114 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ use amalthea::comm::data_explorer_comm::BackendState;
1414
use amalthea::comm::data_explorer_comm::CodeSyntaxName;
1515
use amalthea::comm::data_explorer_comm::ColumnDisplayType;
1616
use amalthea::comm::data_explorer_comm::ColumnFilter;
17+
use amalthea::comm::data_explorer_comm::ColumnFilterParams;
18+
use amalthea::comm::data_explorer_comm::ColumnFilterType;
19+
use amalthea::comm::data_explorer_comm::ColumnFilterTypeSupportStatus;
1720
use amalthea::comm::data_explorer_comm::ColumnProfileType;
1821
use amalthea::comm::data_explorer_comm::ColumnProfileTypeSupportStatus;
1922
use amalthea::comm::data_explorer_comm::ColumnSchema;
@@ -53,6 +56,7 @@ use amalthea::comm::data_explorer_comm::TableRowLabels;
5356
use amalthea::comm::data_explorer_comm::TableSchema;
5457
use amalthea::comm::data_explorer_comm::TableSelection;
5558
use amalthea::comm::data_explorer_comm::TableShape;
59+
use amalthea::comm::data_explorer_comm::TextSearchType;
5660
use amalthea::comm::event::CommManagerEvent;
5761
use amalthea::socket::comm::CommInitiator;
5862
use amalthea::socket::comm::CommSocket;
@@ -532,9 +536,7 @@ impl RDataExplorer {
532536
return Err(anyhow!("Data Explorer: Not yet supported"));
533537
},
534538

535-
DataExplorerBackendRequest::SearchSchema(_) => {
536-
return Err(anyhow!("Data Explorer: Not yet supported"));
537-
},
539+
DataExplorerBackendRequest::SearchSchema(params) => self.search_schema(params),
538540

539541
DataExplorerBackendRequest::SetColumnFilters(_) => {
540542
return Err(anyhow!("Data Explorer: Not yet supported"));
@@ -859,6 +861,104 @@ impl RDataExplorer {
859861
self.view_indices = Some(view_indices);
860862
}
861863

864+
/// Search the schema for columns matching the given filters and sort order.
865+
///
866+
/// - `params`: The search parameters including filters and sort order.
867+
fn search_schema(
868+
&self,
869+
params: amalthea::comm::data_explorer_comm::SearchSchemaParams,
870+
) -> anyhow::Result<DataExplorerBackendReply> {
871+
let all_columns = &self.shape.columns;
872+
873+
// Apply column filters to find matching columns using iterator chaining
874+
let mut matching_indices: Vec<i64> = all_columns
875+
.iter()
876+
.enumerate()
877+
.filter_map(|(index, column)| {
878+
let column_index = index as i64;
879+
880+
// Check if column matches all filters
881+
let matches = params
882+
.filters
883+
.iter()
884+
.all(|filter| self.column_matches_filter(column, filter));
885+
886+
if matches {
887+
Some(column_index)
888+
} else {
889+
None
890+
}
891+
})
892+
.collect();
893+
894+
// Apply sort order
895+
match params.sort_order {
896+
amalthea::comm::data_explorer_comm::SearchSchemaSortOrder::Original => {
897+
// matching_indices is already in original order
898+
},
899+
order => {
900+
let ascending = matches!(
901+
order,
902+
amalthea::comm::data_explorer_comm::SearchSchemaSortOrder::Ascending
903+
);
904+
matching_indices.sort_by(|&a, &b| {
905+
let ord = all_columns[a as usize]
906+
.column_name
907+
.cmp(&all_columns[b as usize].column_name);
908+
if ascending { ord } else { ord.reverse() }
909+
});
910+
}
911+
}
912+
913+
Ok(DataExplorerBackendReply::SearchSchemaReply(
914+
amalthea::comm::data_explorer_comm::SearchSchemaResult {
915+
matches: matching_indices,
916+
},
917+
))
918+
}
919+
920+
/// Check if a column matches a given column filter.
921+
fn column_matches_filter(&self, column: &ColumnSchema, filter: &ColumnFilter) -> bool {
922+
match filter.filter_type {
923+
ColumnFilterType::TextSearch => {
924+
if let ColumnFilterParams::TextSearch(text_search) = &filter.params {
925+
let column_name = if text_search.case_sensitive {
926+
column.column_name.to_owned()
927+
} else {
928+
column.column_name.to_lowercase()
929+
};
930+
931+
let search_term = if text_search.case_sensitive {
932+
text_search.term.to_owned()
933+
} else {
934+
text_search.term.to_lowercase()
935+
};
936+
937+
match text_search.search_type {
938+
TextSearchType::Contains => column_name.contains(&search_term),
939+
TextSearchType::NotContains => !column_name.contains(&search_term),
940+
TextSearchType::StartsWith => column_name.starts_with(&search_term),
941+
TextSearchType::EndsWith => column_name.ends_with(&search_term),
942+
TextSearchType::RegexMatch => {
943+
// For regex matching, we use simple string matching as a fallback
944+
// A full regex implementation would require additional dependencies
945+
column_name.contains(&search_term)
946+
},
947+
}
948+
} else {
949+
false
950+
}
951+
},
952+
ColumnFilterType::MatchDataTypes => {
953+
if let ColumnFilterParams::MatchDataTypes(type_filter) = &filter.params {
954+
type_filter.display_types.contains(&column.type_display)
955+
} else {
956+
false
957+
}
958+
},
959+
}
960+
}
961+
862962
/// Get the schema for a vector of columns in the data object.
863963
///
864964
/// - `column_indices`: The vector of columns in the data object.
@@ -950,8 +1050,17 @@ impl RDataExplorer {
9501050
],
9511051
},
9521052
search_schema: SearchSchemaFeatures {
953-
support_status: SupportStatus::Unsupported,
954-
supported_types: vec![],
1053+
support_status: SupportStatus::Supported,
1054+
supported_types: vec![
1055+
ColumnFilterTypeSupportStatus {
1056+
column_filter_type: ColumnFilterType::TextSearch,
1057+
support_status: SupportStatus::Supported,
1058+
},
1059+
ColumnFilterTypeSupportStatus {
1060+
column_filter_type: ColumnFilterType::MatchDataTypes,
1061+
support_status: SupportStatus::Supported,
1062+
},
1063+
],
9551064
},
9561065
set_row_filters: SetRowFiltersFeatures {
9571066
support_status: SupportStatus::Supported,

crates/ark/src/variables/r_variables.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use amalthea::comm::variables_comm::VariablesBackendReply;
1818
use amalthea::comm::variables_comm::VariablesBackendRequest;
1919
use amalthea::comm::variables_comm::VariablesFrontendEvent;
2020
use amalthea::socket::comm::CommSocket;
21+
use anyhow::anyhow;
2122
use crossbeam::channel::select;
2223
use crossbeam::channel::unbounded;
2324
use crossbeam::channel::Sender;
@@ -294,6 +295,9 @@ impl RVariables {
294295
let viewer_id = self.view(&params.path)?;
295296
Ok(VariablesBackendReply::ViewReply(viewer_id))
296297
},
298+
VariablesBackendRequest::QueryTableSummary(_) => {
299+
return Err(anyhow!("Variables: QueryTableSummary not yet supported"));
300+
},
297301
}
298302
}
299303

0 commit comments

Comments
 (0)