@@ -136,22 +136,33 @@ impl RDataExplorer {
136
136
}
137
137
}
138
138
139
- fn handle_rpc ( & self , req : DataExplorerBackendRequest ) -> anyhow:: Result < DataExplorerBackendReply > {
139
+ fn handle_rpc (
140
+ & self ,
141
+ req : DataExplorerBackendRequest ,
142
+ ) -> anyhow:: Result < DataExplorerBackendReply > {
140
143
match req {
141
144
DataExplorerBackendRequest :: GetSchema ( GetSchemaParams {
142
145
start_index,
143
146
num_columns,
144
147
} ) => {
145
- // TODO: Support for data frames with over 2B rows
146
- // TODO: Check bounds
147
- r_task ( || self . r_get_schema ( start_index as i32 , num_columns as i32 ) )
148
+ // TODO: Support for data frames with over 2B rows. Note that neither base R nor
149
+ // tidyverse support long vectors in data frames, but data.table does.
150
+ let num_columns: i32 = num_columns. try_into ( ) ?;
151
+ let start_index: i32 = start_index. try_into ( ) ?;
152
+ r_task ( || self . r_get_schema ( start_index, num_columns) )
148
153
} ,
149
154
DataExplorerBackendRequest :: GetDataValues ( GetDataValuesParams {
150
155
row_start_index,
151
156
num_rows,
152
157
column_indices,
153
158
} ) => {
154
- // Fetch stringified data values and return
159
+ // TODO: Support for data frames with over 2B rows
160
+ let row_start_index: i32 = row_start_index. try_into ( ) ?;
161
+ let num_rows: i32 = num_rows. try_into ( ) ?;
162
+ let column_indices: Vec < i32 > = column_indices
163
+ . into_iter ( )
164
+ . map ( i32:: try_from)
165
+ . collect :: < Result < Vec < i32 > , _ > > ( ) ?;
155
166
r_task ( || self . r_get_data_values ( row_start_index, num_rows, column_indices) )
156
167
} ,
157
168
DataExplorerBackendRequest :: SetSortColumns ( SetSortColumnsParams { sort_keys : _ } ) => {
@@ -239,66 +250,74 @@ impl RDataExplorer {
239
250
240
251
fn r_get_data_values (
241
252
& self ,
242
- row_start_index : i64 ,
243
- num_rows : i64 ,
244
- column_indices : Vec < i64 > ,
253
+ row_start_index : i32 ,
254
+ num_rows : i32 ,
255
+ column_indices : Vec < i32 > ,
245
256
) -> anyhow:: Result < DataExplorerBackendReply > {
246
- unsafe {
247
- let table = self . table . get ( ) . clone ( ) ;
248
- let object = * table;
249
-
250
- let harp:: TableInfo {
251
- kind,
252
- dims :
253
- harp:: TableDim {
254
- num_rows : total_num_rows,
255
- num_cols : total_num_columns,
256
- } ,
257
- ..
258
- } = harp:: table_info ( object) ?;
259
-
260
- let total_num_rows = total_num_rows as i64 ;
261
-
262
- let lower_bound = cmp:: min ( row_start_index, total_num_rows) as isize ;
263
- let upper_bound = cmp:: min ( row_start_index + num_rows, total_num_rows) as isize ;
264
-
265
- let mut column_data: Vec < Vec < String > > = Vec :: new ( ) ;
266
- for column_index in column_indices {
267
- let column_index = column_index as i32 ;
268
- if column_index >= total_num_columns {
269
- // For now we skip any columns requested beyond last one
270
- break ;
271
- }
272
-
273
- let column = if let harp:: TableKind :: Dataframe = kind {
274
- RObject :: from ( VECTOR_ELT ( object, column_index as isize ) )
275
- } else {
276
- RFunction :: new ( "base" , "[" )
277
- . add ( object)
278
- . param ( "i" , R_MissingArg )
279
- . param ( "j" , column_index + 1 )
280
- . call ( ) ?
281
- } ;
282
- let formatter = FormattedVector :: new ( * column) ?;
283
-
284
- let mut formatted_data = Vec :: new ( ) ;
285
- for i in lower_bound..upper_bound {
286
- formatted_data. push ( formatter. get_unchecked ( i) ) ;
287
- }
288
- column_data. push ( formatted_data) ;
289
- }
257
+ let table = self . table . get ( ) . clone ( ) ;
258
+ let object = * table;
259
+
260
+ let harp:: TableInfo {
261
+ dims :
262
+ harp:: TableDim {
263
+ num_rows : total_num_rows,
264
+ num_cols : total_num_cols,
265
+ } ,
266
+ ..
267
+ } = harp:: table_info ( object) ?;
268
+
269
+ let lower_bound = cmp:: min ( row_start_index, total_num_rows) as isize ;
270
+ let upper_bound = cmp:: min ( row_start_index + num_rows, total_num_rows) as isize ;
271
+
272
+ // Create R indices
273
+ let cols_r_idx: Vec < i32 > = column_indices
274
+ . into_iter ( )
275
+ // For now we skip any columns requested beyond last one
276
+ . filter ( |x| * x < total_num_cols)
277
+ . map ( |x| x + 1 )
278
+ . collect ( ) ;
279
+ let cols_r_idx: RObject = cols_r_idx. try_into ( ) ?;
280
+ let num_cols = cols_r_idx. length ( ) as i32 ;
281
+
282
+ let rows_r_idx = RFunction :: new ( "base" , ":" )
283
+ . add ( ( lower_bound + 1 ) as i32 )
284
+ . add ( ( upper_bound + 1 ) as i32 )
285
+ . call ( ) ?;
286
+
287
+ // Subset rows in advance, including unmaterialized row names. Also
288
+ // subset spend time creating subsetting columns that we don't need.
289
+ // Supports dispatch and should be vectorised in most implementations.
290
+ let object = RFunction :: new ( "base" , "[" )
291
+ . add ( object)
292
+ . add ( rows_r_idx. sexp )
293
+ . add ( cols_r_idx. sexp )
294
+ . param ( "drop" , false )
295
+ . call ( ) ?;
296
+
297
+ let mut column_data: Vec < Vec < String > > = Vec :: new ( ) ;
298
+ for i in 0 ..num_cols {
299
+ let column = RFunction :: new ( "base" , "[" )
300
+ . add ( object. clone ( ) )
301
+ . add ( unsafe { R_MissingArg } )
302
+ . add ( i + 1 )
303
+ . param ( "drop" , true )
304
+ . call ( ) ?;
305
+
306
+ let formatter = FormattedVector :: new ( * column) ?;
307
+ let formatted = formatter. iter ( ) . collect ( ) ;
308
+
309
+ column_data. push ( formatted) ;
310
+ }
290
311
291
- let row_names = RFunction :: new ( "base" , "row.names" ) . add ( object) . call ( ) ?;
292
- let row_labels = RFunction :: new ( "base" , "format" ) . add ( row_names) . call ( ) ?;
293
- let row_labels: Vec < String > = row_labels. try_into ( ) ?;
312
+ let row_names = RFunction :: new ( "base" , "row.names" ) . add ( object) . call ( ) ?;
313
+ let row_labels: Vec < String > = row_names. try_into ( ) ?;
294
314
295
- let response = TableData {
296
- columns : column_data,
297
- row_labels : Some ( vec ! [ row_labels] ) ,
298
- } ;
315
+ let response = TableData {
316
+ columns : column_data,
317
+ row_labels : Some ( vec ! [ row_labels] ) ,
318
+ } ;
299
319
300
- Ok ( DataExplorerBackendReply :: GetDataValuesReply ( response) )
301
- }
320
+ Ok ( DataExplorerBackendReply :: GetDataValuesReply ( response) )
302
321
}
303
322
}
304
323
0 commit comments