Skip to content

Commit 455152a

Browse files
authored
Add support for table summary tool (#904)
1 parent 7daf59e commit 455152a

File tree

4 files changed

+357
-19
lines changed

4 files changed

+357
-19
lines changed

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ pub struct DataObjectEnvInfo {
112112
pub env: RThreadSafe<RObject>,
113113
}
114114

115-
struct DataObjectShape {
115+
pub(crate) struct DataObjectShape {
116116
pub columns: Vec<ColumnSchema>,
117117
pub num_rows: i32,
118118
pub kind: TableKind,
@@ -580,7 +580,7 @@ impl RDataExplorer {
580580

581581
// Methods that must be run on the main R thread
582582
impl RDataExplorer {
583-
fn r_get_shape(table: RObject) -> anyhow::Result<DataObjectShape> {
583+
pub(crate) fn r_get_shape(table: RObject) -> anyhow::Result<DataObjectShape> {
584584
unsafe {
585585
let table = table.clone();
586586

crates/ark/src/variables/r_variables.rs

Lines changed: 103 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use amalthea::comm::event::CommManagerEvent;
1010
use amalthea::comm::variables_comm::ClipboardFormatFormat;
1111
use amalthea::comm::variables_comm::FormattedVariable;
1212
use amalthea::comm::variables_comm::InspectedVariable;
13+
use amalthea::comm::variables_comm::QueryTableSummaryResult;
1314
use amalthea::comm::variables_comm::RefreshParams;
1415
use amalthea::comm::variables_comm::UpdateParams;
1516
use amalthea::comm::variables_comm::Variable;
@@ -40,6 +41,7 @@ use stdext::spawn;
4041

4142
use crate::data_explorer::r_data_explorer::DataObjectEnvInfo;
4243
use crate::data_explorer::r_data_explorer::RDataExplorer;
44+
use crate::data_explorer::summary_stats::summary_stats;
4345
use crate::lsp::events::EVENTS;
4446
use crate::r_task;
4547
use crate::thread::RThreadSafe;
@@ -119,7 +121,7 @@ impl RVariables {
119121
// Validate that the RObject we were passed is actually an environment
120122
if let Err(err) = r_assert_type(env.sexp, &[ENVSXP]) {
121123
log::warn!(
122-
"Environment: Attempt to monitor or list non-environment object {env:?} ({err:?})"
124+
"Variables: Attempt to monitor or list non-environment object {env:?} ({err:?})"
123125
);
124126
}
125127

@@ -191,17 +193,17 @@ impl RVariables {
191193
// appropriate. Retrying is likely to just lead to a busy
192194
// loop.
193195
log::error!(
194-
"Environment: Error receiving message from frontend: {err:?}"
196+
"Variables: Error receiving message from frontend: {err:?}"
195197
);
196198

197199
break;
198200
},
199201
};
200-
log::info!("Environment: Received message from frontend: {msg:?}");
202+
log::info!("Variables: Received message from frontend: {msg:?}");
201203

202204
// Break out of the loop if the frontend has closed the channel
203205
if let CommMsg::Close = msg {
204-
log::info!("Environment: Closing down after receiving comm_close from frontend.");
206+
log::info!("Variables: Closing down after receiving comm_close from frontend.");
205207

206208
// Remember that the user initiated the close so that we can
207209
// avoid sending a duplicate close message from the back end
@@ -295,8 +297,9 @@ impl RVariables {
295297
let viewer_id = self.view(&params.path)?;
296298
Ok(VariablesBackendReply::ViewReply(viewer_id))
297299
},
298-
VariablesBackendRequest::QueryTableSummary(_) => {
299-
return Err(anyhow!("Variables: QueryTableSummary not yet supported"));
300+
VariablesBackendRequest::QueryTableSummary(params) => {
301+
let result = self.query_table_summary(&params.path, &params.query_types)?;
302+
Ok(VariablesBackendReply::QueryTableSummaryReply(result))
300303
},
301304
}
302305
}
@@ -400,6 +403,98 @@ impl RVariables {
400403
})
401404
}
402405

406+
/// Query table summary for the given variable.
407+
///
408+
/// - `path`: The path to the variable to summarize, as an array of access keys
409+
/// - `query_types`: A list of query types (e.g. "summary_stats")
410+
///
411+
/// Returns summary information about the table including schemas and profiles.
412+
fn query_table_summary(
413+
&mut self,
414+
path: &Vec<String>,
415+
query_types: &Vec<String>,
416+
) -> anyhow::Result<QueryTableSummaryResult> {
417+
r_task(|| {
418+
let env = self.env.get().clone();
419+
let table = PositronVariable::resolve_data_object(env, &path)?;
420+
421+
let kind = if harp::utils::r_is_data_frame(table.sexp) {
422+
harp::TableKind::Dataframe
423+
} else if harp::utils::r_is_matrix(table.sexp) {
424+
harp::TableKind::Matrix
425+
} else {
426+
return Err(anyhow!(
427+
"Object is not a supported table type (data.frame or matrix)"
428+
));
429+
};
430+
431+
let num_cols = match kind {
432+
harp::TableKind::Dataframe => {
433+
let ncol = harp::DataFrame::n_col(table.sexp)?;
434+
ncol as i64
435+
},
436+
harp::TableKind::Matrix => {
437+
let (_nrow, ncol) = harp::Matrix::dim(table.sexp)?;
438+
ncol as i64
439+
},
440+
};
441+
442+
let shapes = RDataExplorer::r_get_shape(table.clone())?;
443+
444+
let column_schemas: Vec<String> = shapes
445+
.columns
446+
.iter()
447+
.map(|schema| serde_json::to_string(schema))
448+
.collect::<Result<Vec<_>, _>>()?;
449+
450+
let mut column_profiles: Vec<String> = vec![];
451+
452+
if query_types.contains(&"summary_stats".to_string()) {
453+
let profiles: Vec<String> = shapes
454+
.columns
455+
.iter()
456+
.enumerate()
457+
.map(|(i, schema)| -> anyhow::Result<String> {
458+
let column = harp::tbl_get_column(table.sexp, i as i32, kind)?;
459+
460+
let format_options = amalthea::comm::data_explorer_comm::FormatOptions {
461+
large_num_digits: 4,
462+
small_num_digits: 6,
463+
max_integral_digits: 7,
464+
max_value_length: 1000,
465+
thousands_sep: None,
466+
};
467+
468+
let summary_stats =
469+
summary_stats(column.sexp, schema.type_display, &format_options).map(
470+
|stats| {
471+
serde_json::to_value(stats).unwrap_or(serde_json::Value::Null)
472+
},
473+
)?;
474+
475+
let profile = serde_json::json!({
476+
"column_name": schema.column_name,
477+
"type_display": format!("{:?}", schema.type_display).to_lowercase(),
478+
"summary_stats": summary_stats,
479+
})
480+
.to_string();
481+
482+
Ok(profile)
483+
})
484+
.collect::<anyhow::Result<Vec<String>>>()?;
485+
486+
column_profiles.extend(profiles);
487+
}
488+
489+
Ok(QueryTableSummaryResult {
490+
num_rows: shapes.num_rows as i64,
491+
num_columns: num_cols,
492+
column_schemas,
493+
column_profiles,
494+
})
495+
})
496+
}
497+
403498
fn send_event(&mut self, message: VariablesFrontendEvent, request_id: Option<String>) {
404499
let data = serde_json::to_value(message);
405500

@@ -415,7 +510,7 @@ impl RVariables {
415510
self.comm.outgoing_tx.send(comm_msg).unwrap()
416511
},
417512
Err(err) => {
418-
log::error!("Environment: Failed to serialize environment data: {err}");
513+
log::error!("Variables: Failed to serialize environment data: {err}");
419514
},
420515
}
421516
}
@@ -449,7 +544,7 @@ impl RVariables {
449544
Err(err) => {
450545
// This isn't a critical error but would also be very
451546
// unexpected.
452-
log::error!("Environment: Could not evaluate .Last.value ({err:?})");
547+
log::error!("Variables: Could not evaluate .Last.value ({err:?})");
453548
None
454549
},
455550
}

0 commit comments

Comments
 (0)