Skip to content

Commit 30df1c8

Browse files
authored
Fix row labels of data explorer (#247)
* Check bounds of request parameters * Subset data before materialising row names * Don't name `i` and `j` arguments to `[` Avoids these warnings: ``` named arguments other than 'drop' are discouraged ```
1 parent 847b1e6 commit 30df1c8

File tree

2 files changed

+103
-61
lines changed

2 files changed

+103
-61
lines changed

crates/ark/src/data_explorer/r_data_explorer.rs

Lines changed: 80 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -136,22 +136,33 @@ impl RDataExplorer {
136136
}
137137
}
138138

139-
fn handle_rpc(&self, req: DataExplorerBackendRequest) -> anyhow::Result<DataExplorerBackendReply> {
139+
fn handle_rpc(
140+
&self,
141+
req: DataExplorerBackendRequest,
142+
) -> anyhow::Result<DataExplorerBackendReply> {
140143
match req {
141144
DataExplorerBackendRequest::GetSchema(GetSchemaParams {
142145
start_index,
143146
num_columns,
144147
}) => {
145-
// TODO: Support for data frames with over 2B rows
146-
// TODO: Check bounds
147-
r_task(|| self.r_get_schema(start_index as i32, num_columns as i32))
148+
// TODO: Support for data frames with over 2B rows. Note that neither base R nor
149+
// tidyverse support long vectors in data frames, but data.table does.
150+
let num_columns: i32 = num_columns.try_into()?;
151+
let start_index: i32 = start_index.try_into()?;
152+
r_task(|| self.r_get_schema(start_index, num_columns))
148153
},
149154
DataExplorerBackendRequest::GetDataValues(GetDataValuesParams {
150155
row_start_index,
151156
num_rows,
152157
column_indices,
153158
}) => {
154-
// Fetch stringified data values and return
159+
// TODO: Support for data frames with over 2B rows
160+
let row_start_index: i32 = row_start_index.try_into()?;
161+
let num_rows: i32 = num_rows.try_into()?;
162+
let column_indices: Vec<i32> = column_indices
163+
.into_iter()
164+
.map(i32::try_from)
165+
.collect::<Result<Vec<i32>, _>>()?;
155166
r_task(|| self.r_get_data_values(row_start_index, num_rows, column_indices))
156167
},
157168
DataExplorerBackendRequest::SetSortColumns(SetSortColumnsParams { sort_keys: _ }) => {
@@ -239,66 +250,74 @@ impl RDataExplorer {
239250

240251
fn r_get_data_values(
241252
&self,
242-
row_start_index: i64,
243-
num_rows: i64,
244-
column_indices: Vec<i64>,
253+
row_start_index: i32,
254+
num_rows: i32,
255+
column_indices: Vec<i32>,
245256
) -> anyhow::Result<DataExplorerBackendReply> {
246-
unsafe {
247-
let table = self.table.get().clone();
248-
let object = *table;
249-
250-
let harp::TableInfo {
251-
kind,
252-
dims:
253-
harp::TableDim {
254-
num_rows: total_num_rows,
255-
num_cols: total_num_columns,
256-
},
257-
..
258-
} = harp::table_info(object)?;
259-
260-
let total_num_rows = total_num_rows as i64;
261-
262-
let lower_bound = cmp::min(row_start_index, total_num_rows) as isize;
263-
let upper_bound = cmp::min(row_start_index + num_rows, total_num_rows) as isize;
264-
265-
let mut column_data: Vec<Vec<String>> = Vec::new();
266-
for column_index in column_indices {
267-
let column_index = column_index as i32;
268-
if column_index >= total_num_columns {
269-
// For now we skip any columns requested beyond last one
270-
break;
271-
}
272-
273-
let column = if let harp::TableKind::Dataframe = kind {
274-
RObject::from(VECTOR_ELT(object, column_index as isize))
275-
} else {
276-
RFunction::new("base", "[")
277-
.add(object)
278-
.param("i", R_MissingArg)
279-
.param("j", column_index + 1)
280-
.call()?
281-
};
282-
let formatter = FormattedVector::new(*column)?;
283-
284-
let mut formatted_data = Vec::new();
285-
for i in lower_bound..upper_bound {
286-
formatted_data.push(formatter.get_unchecked(i));
287-
}
288-
column_data.push(formatted_data);
289-
}
257+
let table = self.table.get().clone();
258+
let object = *table;
259+
260+
let harp::TableInfo {
261+
dims:
262+
harp::TableDim {
263+
num_rows: total_num_rows,
264+
num_cols: total_num_cols,
265+
},
266+
..
267+
} = harp::table_info(object)?;
268+
269+
let lower_bound = cmp::min(row_start_index, total_num_rows) as isize;
270+
let upper_bound = cmp::min(row_start_index + num_rows, total_num_rows) as isize;
271+
272+
// Create R indices
273+
let cols_r_idx: Vec<i32> = column_indices
274+
.into_iter()
275+
// For now we skip any columns requested beyond last one
276+
.filter(|x| *x < total_num_cols)
277+
.map(|x| x + 1)
278+
.collect();
279+
let cols_r_idx: RObject = cols_r_idx.try_into()?;
280+
let num_cols = cols_r_idx.length() as i32;
281+
282+
let rows_r_idx = RFunction::new("base", ":")
283+
.add((lower_bound + 1) as i32)
284+
.add((upper_bound + 1) as i32)
285+
.call()?;
286+
287+
// Subset rows in advance, including unmaterialized row names. Also
288+
// subset spend time creating subsetting columns that we don't need.
289+
// Supports dispatch and should be vectorised in most implementations.
290+
let object = RFunction::new("base", "[")
291+
.add(object)
292+
.add(rows_r_idx.sexp)
293+
.add(cols_r_idx.sexp)
294+
.param("drop", false)
295+
.call()?;
296+
297+
let mut column_data: Vec<Vec<String>> = Vec::new();
298+
for i in 0..num_cols {
299+
let column = RFunction::new("base", "[")
300+
.add(object.clone())
301+
.add(unsafe { R_MissingArg })
302+
.add(i + 1)
303+
.param("drop", true)
304+
.call()?;
305+
306+
let formatter = FormattedVector::new(*column)?;
307+
let formatted = formatter.iter().collect();
308+
309+
column_data.push(formatted);
310+
}
290311

291-
let row_names = RFunction::new("base", "row.names").add(object).call()?;
292-
let row_labels = RFunction::new("base", "format").add(row_names).call()?;
293-
let row_labels: Vec<String> = row_labels.try_into()?;
312+
let row_names = RFunction::new("base", "row.names").add(object).call()?;
313+
let row_labels: Vec<String> = row_names.try_into()?;
294314

295-
let response = TableData {
296-
columns: column_data,
297-
row_labels: Some(vec![row_labels]),
298-
};
315+
let response = TableData {
316+
columns: column_data,
317+
row_labels: Some(vec![row_labels]),
318+
};
299319

300-
Ok(DataExplorerBackendReply::GetDataValuesReply(response))
301-
}
320+
Ok(DataExplorerBackendReply::GetDataValuesReply(response))
302321
}
303322
}
304323

crates/harp/src/object.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,29 @@ impl TryFrom<RObject> for Vec<Option<String>> {
696696
}
697697
}
698698

699+
impl TryFrom<Vec<i32>> for RObject {
700+
type Error = crate::error::Error;
701+
fn try_from(value: Vec<i32>) -> Result<Self, Self::Error> {
702+
unsafe {
703+
let n = value.len();
704+
705+
let out_raw = Rf_allocVector(INTSXP, n as R_xlen_t);
706+
let out = RObject::new(out_raw);
707+
let v_out = DATAPTR(out_raw) as *mut i32;
708+
709+
for i in 0..n {
710+
let x = value[i];
711+
if x == R_NaInt {
712+
return Err(crate::Error::MissingValueError);
713+
}
714+
*(v_out.offset(i as isize)) = x;
715+
}
716+
717+
return Ok(out);
718+
}
719+
}
720+
}
721+
699722
impl TryFrom<RObject> for HashMap<String, String> {
700723
type Error = crate::error::Error;
701724
fn try_from(value: RObject) -> Result<Self, Self::Error> {

0 commit comments

Comments
 (0)