Skip to content

Commit 78fe9aa

Browse files
authored
Let duckplyr register Ark methods, and avoid unnecessary materialization (#745)
* Only force ALTREP compact row names (i.e. from duckplyr) if requested * Rip out specific ALTREP support in favor of packages registering ark hooks * Note our care around `variable_length()` * Use `table.sexp` * Fix thinko * Bless duckplyr * Remove duplicate check done by `r_is_data_frame()` * Move nrow/ncol into static methods So they are at least tied to the `DataFrame` struct * Turn `mat_dim()` into `Matrix::dim()` * Add `get_attribute()` and friends to `RObject` Remove footgun of `r_names()` which didn't protect
1 parent d3e7f66 commit 78fe9aa

File tree

14 files changed

+357
-302
lines changed

14 files changed

+357
-302
lines changed

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ use harp::exec::RFunction;
6262
use harp::exec::RFunctionExt;
6363
use harp::object::RObject;
6464
use harp::r_symbol;
65+
use harp::table_kind;
6566
use harp::tbl_get_column;
66-
use harp::TableInfo;
67+
use harp::ColumnNames;
6768
use harp::TableKind;
6869
use itertools::Itertools;
6970
use libr::*;
@@ -562,22 +563,27 @@ impl RDataExplorer {
562563
fn r_get_shape(table: RObject) -> anyhow::Result<DataObjectShape> {
563564
unsafe {
564565
let table = table.clone();
565-
let object = *table;
566566

567-
let info = table_info_or_bail(object)?;
567+
let Some(kind) = table_kind(table.sexp) else {
568+
return Err(anyhow!("Unsupported type for the data viewer"));
569+
};
568570

569-
let harp::TableInfo {
570-
kind,
571-
dims:
572-
harp::TableDim {
573-
num_rows,
574-
num_cols: total_num_columns,
575-
},
576-
col_names: column_names,
577-
} = info;
571+
// `DataFrame::n_row()` will materialize duckplyr compact row names, but we
572+
// are ok with that for the data explorer and don't provide a hook to opt out.
573+
let (n_row, n_col, column_names) = match kind {
574+
TableKind::Dataframe => (
575+
harp::DataFrame::n_row(table.sexp)?,
576+
harp::DataFrame::n_col(table.sexp)?,
577+
ColumnNames::from_data_frame(table.sexp)?,
578+
),
579+
TableKind::Matrix => {
580+
let (n_row, n_col) = harp::Matrix::dim(table.sexp)?;
581+
(n_row, n_col, ColumnNames::from_matrix(table.sexp)?)
582+
},
583+
};
578584

579585
let mut column_schemas = Vec::<ColumnSchema>::new();
580-
for i in 0..(total_num_columns as isize) {
586+
for i in 0..(n_col as isize) {
581587
let column_name = match column_names.get_unchecked(i) {
582588
Some(name) => name,
583589
None => String::from(""),
@@ -586,8 +592,8 @@ impl RDataExplorer {
586592
// TODO: handling for nested data frame columns
587593

588594
let col = match kind {
589-
harp::TableKind::Dataframe => VECTOR_ELT(object, i),
590-
harp::TableKind::Matrix => object,
595+
harp::TableKind::Dataframe => VECTOR_ELT(table.sexp, i),
596+
harp::TableKind::Matrix => table.sexp,
591597
};
592598

593599
let type_name = WorkspaceVariableDisplayType::from(col, false).display_type;
@@ -610,7 +616,7 @@ impl RDataExplorer {
610616
Ok(DataObjectShape {
611617
columns: column_schemas,
612618
kind,
613-
num_rows,
619+
num_rows: n_row,
614620
})
615621
}
616622
}
@@ -1073,10 +1079,6 @@ impl RDataExplorer {
10731079
}
10741080
}
10751081

1076-
fn table_info_or_bail(x: SEXP) -> anyhow::Result<TableInfo> {
1077-
harp::table_info(x).ok_or(anyhow!("Unsupported type for data viewer"))
1078-
}
1079-
10801082
/// Open an R object in the data viewer.
10811083
///
10821084
/// This function is called from the R side to open an R object in the data viewer.

crates/ark/src/modules/positron/methods.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ark_methods_table$ark_positron_variable_get_children <- new.env(
2424
)
2525
lockEnvironment(ark_methods_table, TRUE)
2626

27-
ark_methods_allowed_packages <- c("torch", "reticulate")
27+
ark_methods_allowed_packages <- c("torch", "reticulate", "duckplyr")
2828

2929
# check if the calling package is allowed to touch the methods table
3030
check_caller_allowed <- function() {
@@ -77,7 +77,9 @@ check_register_args <- function(generic, class) {
7777
check_register_args(generic, class)
7878

7979
for (cls in class) {
80-
if (exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE)) {
80+
if (
81+
exists(cls, envir = ark_methods_table[[generic]], inherits = FALSE)
82+
) {
8183
remove(list = cls, envir = ark_methods_table[[generic]])
8284
}
8385
}

crates/ark/src/srcref.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ fn generate_source(
139139
}
140140

141141
// Ignore functions that already have sources
142-
if let Some(_) = old.attr("srcref") {
142+
if let Some(_) = old.get_attribute("srcref") {
143143
return Ok(None);
144144
}
145145

crates/ark/src/variables/variable.rs

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use harp::object::RObject;
2525
use harp::r_null;
2626
use harp::r_symbol;
2727
use harp::symbol::RSymbol;
28-
use harp::table_info;
2928
use harp::utils::pairlist_size;
3029
use harp::utils::r_altrep_class;
3130
use harp::utils::r_assert_type;
@@ -51,7 +50,7 @@ use harp::vector::CharacterVector;
5150
use harp::vector::IntegerVector;
5251
use harp::vector::Vector;
5352
use harp::List;
54-
use harp::TableDim;
53+
use harp::TableKind;
5554
use itertools::Itertools;
5655
use libr::*;
5756
use stdext::local;
@@ -90,7 +89,7 @@ impl WorkspaceVariableDisplayValue {
9089

9190
let out = match r_typeof(value) {
9291
NILSXP => Self::new(String::from("NULL"), false),
93-
VECSXP if r_inherits(value, "data.frame") => Self::from_data_frame(value),
92+
VECSXP if r_is_data_frame(value) => Self::from_data_frame(value)?,
9493
VECSXP if !r_inherits(value, "POSIXlt") => Self::from_list(value),
9594
LISTSXP => Self::empty(),
9695
SYMSXP if value == unsafe { R_MissingArg } => {
@@ -157,15 +156,17 @@ impl WorkspaceVariableDisplayValue {
157156
Ok(Self::new(display_value, truncated))
158157
}
159158

160-
fn from_data_frame(value: SEXP) -> Self {
161-
let dim = match unsafe { harp::df_dim(value) } {
162-
Ok(dim) => dim,
163-
// FIXME: Needs more type safety
164-
Err(_) => TableDim {
165-
num_rows: -1,
166-
num_cols: -1,
167-
},
168-
};
159+
fn from_data_frame(value: SEXP) -> anyhow::Result<Self> {
160+
// Classes should provide an `ark_positron_variable_display_value()` method
161+
// if they need to opt out of ALTREP materialization here.
162+
let n_row = harp::DataFrame::n_row(value)?;
163+
let n_col = harp::DataFrame::n_col(value)?;
164+
165+
let row_word = plural("row", n_row);
166+
let col_word = plural("column", n_col);
167+
168+
let n_row = n_row.to_string();
169+
let n_col = n_col.to_string();
169170

170171
let class = match r_classes(value) {
171172
None => String::from(""),
@@ -175,15 +176,9 @@ impl WorkspaceVariableDisplayValue {
175176
},
176177
};
177178

178-
let value = format!(
179-
"[{} {} x {} {}]{}",
180-
dim.num_rows,
181-
plural("row", dim.num_rows),
182-
dim.num_cols,
183-
plural("column", dim.num_cols),
184-
class
185-
);
186-
Self::new(value, false)
179+
let value = format!("[{n_row} {row_word} x {n_col} {col_word}]{class}");
180+
181+
Ok(Self::new(value, false))
187182
}
188183

189184
fn from_list(value: SEXP) -> Self {
@@ -305,13 +300,7 @@ impl WorkspaceVariableDisplayValue {
305300
}
306301

307302
fn from_matrix(value: SEXP) -> anyhow::Result<Self> {
308-
let (n_row, n_col) = match harp::table_info(value) {
309-
Some(info) => (info.dims.num_rows, info.dims.num_cols),
310-
None => {
311-
log::error!("Failed to get matrix dimensions");
312-
(-1, -1)
313-
},
314-
};
303+
let (n_row, n_col) = harp::Matrix::dim(value)?;
315304

316305
let class = match r_classes(value) {
317306
None => String::from(" <matrix>"),
@@ -496,13 +485,23 @@ impl WorkspaceVariableDisplayType {
496485
let dfclass = classes.get_unchecked(0).unwrap();
497486
match include_length {
498487
true => {
499-
let dim = table_info(value);
500-
let shape = match dim {
501-
Some(info) => {
502-
format!("{}, {}", info.dims.num_rows, info.dims.num_cols)
488+
// Classes should provide an `ark_positron_variable_display_type()` method
489+
// if they need to opt out of ALTREP materialization here.
490+
let n_row: String = match harp::DataFrame::n_row(value) {
491+
Ok(n_row) => n_row.to_string(),
492+
Err(error) => {
493+
log::error!("Can't compute number of rows: {error}");
494+
String::from("?")
503495
},
504-
None => String::from("?, ?"),
505496
};
497+
let n_col = match harp::DataFrame::n_col(value) {
498+
Ok(n_col) => n_col.to_string(),
499+
Err(error) => {
500+
log::error!("Can't compute number of columns: {error}");
501+
String::from("?")
502+
},
503+
};
504+
let shape = format!("{n_row}, {n_col}");
506505
let display_type = format!("{} [{}]", dfclass, shape);
507506
Self::simple(display_type)
508507
},
@@ -731,8 +730,26 @@ impl PositronVariable {
731730

732731
fn variable_length(x: SEXP) -> usize {
733732
// Check for tabular data
734-
if let Some(info) = harp::table_info(x) {
735-
return info.dims.num_cols as usize;
733+
// We don't currently provide an ark hook for variable length, so we are somewhat
734+
// careful to only query n-col and never n-row for data frames, to avoid duckplyr
735+
// query materialization.
736+
if let Some(kind) = harp::table_kind(x) {
737+
return match kind {
738+
TableKind::Dataframe => match harp::DataFrame::n_col(x) {
739+
Ok(n_col) => n_col as usize,
740+
Err(error) => {
741+
log::error!("Can't compute number of data frame columns: {error}");
742+
0
743+
},
744+
},
745+
TableKind::Matrix => match harp::Matrix::dim(x) {
746+
Ok((_n_row, n_col)) => n_col as usize,
747+
Err(error) => {
748+
log::error!("Can't compute matrix dimensions: {error}");
749+
0
750+
},
751+
},
752+
};
736753
}
737754

738755
// Otherwise treat as vector
@@ -1201,14 +1218,7 @@ impl PositronVariable {
12011218

12021219
fn inspect_matrix(matrix: SEXP) -> anyhow::Result<Vec<Variable>> {
12031220
let matrix = RObject::new(matrix);
1204-
1205-
let n_col = match harp::table_info(matrix.sexp) {
1206-
Some(info) => info.dims.num_cols,
1207-
None => {
1208-
log::warn!("Unexpected matrix object. Couldn't get dimensions.");
1209-
return Ok(vec![]);
1210-
},
1211-
};
1221+
let (_n_row, n_col) = harp::Matrix::dim(matrix.sexp)?;
12121222

12131223
let make_variable = |access_key, display_name, display_value, is_truncated| Variable {
12141224
access_key,

crates/harp/src/attrib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fn zap_srcref_fn(x: SEXP) -> RObject {
2020
unsafe {
2121
let x = RObject::view(x).shallow_duplicate();
2222

23-
x.set_attr("srcref", r_null());
23+
x.set_attribute("srcref", r_null());
2424
libr::SET_BODY(x.sexp, zap_srcref(libr::R_ClosureExpr(x.sexp)).sexp);
2525

2626
x
@@ -64,7 +64,7 @@ fn zap_srcref_expr(x: SEXP) -> RObject {
6464

6565
fn zap_srcref_attrib(x: SEXP) {
6666
let x = RObject::view(x);
67-
x.set_attr("srcfile", r_null());
68-
x.set_attr("srcref", r_null());
69-
x.set_attr("wholeSrcref", r_null());
67+
x.set_attribute("srcfile", r_null());
68+
x.set_attribute("srcref", r_null());
69+
x.set_attribute("wholeSrcref", r_null());
7070
}

crates/harp/src/column_names.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use libr::*;
2+
3+
use crate::exec::RFunction;
4+
use crate::exec::RFunctionExt;
5+
use crate::utils::*;
6+
use crate::vector::Vector;
7+
use crate::CharacterVector;
8+
use crate::RObject;
9+
10+
/// Column names
11+
///
12+
/// Column names represent an optional character vector of names. This class is mostly
13+
/// useful for ergonomics, since [ColumnNames::get_unchecked()] will propagate [None]
14+
/// when used on a vector without column names.
15+
pub struct ColumnNames {
16+
names: Option<CharacterVector>,
17+
}
18+
19+
impl ColumnNames {
20+
pub fn new(names: SEXP) -> Self {
21+
let names = if r_typeof(names) == STRSXP {
22+
Some(unsafe { CharacterVector::new_unchecked(names) })
23+
} else {
24+
None
25+
};
26+
Self { names }
27+
}
28+
29+
pub fn from_data_frame(x: SEXP) -> crate::Result<Self> {
30+
if !r_is_data_frame(x) {
31+
return Err(crate::anyhow!("`x` must be a data frame."));
32+
}
33+
let Some(column_names) = RObject::view(x).get_attribute_names() else {
34+
return Err(crate::anyhow!("`x` must have column names."));
35+
};
36+
Ok(Self::new(column_names.sexp))
37+
}
38+
39+
pub fn from_matrix(x: SEXP) -> crate::Result<Self> {
40+
if !r_is_matrix(x) {
41+
return Err(crate::anyhow!("`x` must be a matrix."));
42+
}
43+
let column_names = RFunction::from("colnames").add(x).call()?;
44+
Ok(Self::new(column_names.sexp))
45+
}
46+
47+
pub fn get_unchecked(&self, index: isize) -> Option<String> {
48+
if let Some(names) = &self.names {
49+
if let Some(name) = names.get_unchecked(index) {
50+
if name.len() > 0 {
51+
return Some(name);
52+
}
53+
}
54+
}
55+
None
56+
}
57+
}

0 commit comments

Comments
 (0)