1
1
use std:: sync:: Arc ;
2
2
3
- use arrow:: datatypes:: { DataType , Fields } ;
3
+ use arrow:: datatypes:: { DataType , SchemaRef } ;
4
4
use datafusion:: common:: { DataFusionError , TableReference } ;
5
5
use datafusion:: functions:: expr_fn:: concat;
6
6
use datafusion:: logical_expr:: { binary_expr, col as datafusion_col, lit} ;
@@ -61,6 +61,19 @@ impl From<&TableBlueprint> for DataFusionQueryData {
61
61
}
62
62
}
63
63
64
+ /// Result of the async datafusion query process.
65
+ #[ derive( Debug , Clone ) ]
66
+ pub struct DataFusionQueryResult {
67
+ /// The record batches to display.
68
+ pub sorbet_batches : Vec < SorbetBatch > ,
69
+
70
+ /// The schema of the record batches.
71
+ pub original_schema : SchemaRef ,
72
+
73
+ /// The migrated schema of the record batches (useful when the list of batches is empty).
74
+ pub sorbet_schema : re_sorbet:: SorbetSchema ,
75
+ }
76
+
64
77
/// A table blueprint along with the context required to execute the corresponding datafusion query.
65
78
#[ derive( Clone ) ]
66
79
struct DataFusionQuery {
@@ -83,12 +96,11 @@ impl DataFusionQuery {
83
96
}
84
97
}
85
98
86
- /// Execute the query and produce a vector of [`SorbetBatch`]s along with physical columns
87
- /// names.
99
+ /// Execute the query to produce the data to display.
88
100
///
89
101
/// Note: the future returned by this function must be `'static`, so it takes `self`. Use
90
102
/// `clone()` as required.
91
- async fn execute ( self ) -> Result < ( Vec < SorbetBatch > , Fields ) , DataFusionError > {
103
+ async fn execute ( self ) -> Result < DataFusionQueryResult , DataFusionError > {
92
104
let mut dataframe = self . session_ctx . table ( self . table_ref ) . await ?;
93
105
94
106
let DataFusionQueryData {
@@ -182,20 +194,16 @@ impl DataFusionQuery {
182
194
}
183
195
184
196
//
185
- // Collect
197
+ // Collect record batches
186
198
//
187
199
200
+ let original_schema = Arc :: clone ( dataframe. schema ( ) . inner ( ) ) ;
188
201
let record_batches = dataframe. collect ( ) . await ?;
189
202
190
- // TODO(#10421) IMPORTANT: fields must be copied here *before* converting to `SorbetBatch`,
191
- // because that conversion modifies the field names. As a result, the schema contained in a
192
- // `SorbetBatch` cannot be used to derive the physical column names as seen by DataFusion.
193
- let fields = record_batches
194
- . first ( )
195
- . map ( |record_batch| record_batch. schema ( ) . fields . clone ( ) )
196
- . unwrap_or_default ( ) ;
203
+ //
204
+ // Convert to `SorbetBatch`
205
+ //
197
206
198
- // convert to SorbetBatch
199
207
let sorbet_batches = record_batches
200
208
. iter ( )
201
209
. map ( |record_batch| {
@@ -204,7 +212,23 @@ impl DataFusionQuery {
204
212
. collect :: < Result < Vec < _ > , _ > > ( )
205
213
. map_err ( |err| DataFusionError :: External ( err. into ( ) ) ) ?;
206
214
207
- Ok ( ( sorbet_batches, fields) )
215
+ //
216
+ // Get (or create) `SorbetSchema`
217
+ //
218
+
219
+ let sorbet_schema = sorbet_batches
220
+ . first ( )
221
+ . map ( |batch| Ok ( batch. sorbet_schema ( ) . clone ( ) ) )
222
+ . unwrap_or_else ( || {
223
+ re_sorbet:: SorbetSchema :: try_from_raw_arrow_schema ( Arc :: clone ( & original_schema) )
224
+ . map_err ( |err| DataFusionError :: External ( err. into ( ) ) )
225
+ } ) ?;
226
+
227
+ Ok ( DataFusionQueryResult {
228
+ sorbet_batches,
229
+ original_schema,
230
+ sorbet_schema,
231
+ } )
208
232
}
209
233
}
210
234
@@ -222,8 +246,8 @@ impl PartialEq for DataFusionQuery {
222
246
}
223
247
}
224
248
225
- type RequestedSorbetBatches =
226
- RequestedObject < Result < ( Vec < SorbetBatch > , arrow :: datatypes :: Fields ) , DataFusionError > > ;
249
+ type RequestedDataFusionQueryResult =
250
+ RequestedObject < Result < DataFusionQueryResult , DataFusionError > > ;
227
251
228
252
/// Helper struct to manage the datafusion async query and the resulting `SorbetBatch`.
229
253
#[ derive( Clone ) ]
@@ -237,11 +261,11 @@ pub struct DataFusionAdapter {
237
261
query : DataFusionQuery ,
238
262
239
263
// Used to have something to display while the new dataframe is being queried.
240
- pub last_sorbet_batches : Option < ( Vec < SorbetBatch > , Fields ) > ,
264
+ pub last_query_results : Option < DataFusionQueryResult > ,
241
265
242
266
// TODO(ab, lucasmerlin): this `Mutex` is only needed because of the `Clone` bound in egui
243
267
// so we should clean that up if the bound is lifted.
244
- pub requested_sorbet_batches : Arc < Mutex < RequestedSorbetBatches > > ,
268
+ pub requested_query_result : Arc < Mutex < RequestedDataFusionQueryResult > > ,
245
269
246
270
pub queried_at : Timestamp ,
247
271
}
@@ -271,13 +295,13 @@ impl DataFusionAdapter {
271
295
let table_state = Self {
272
296
id,
273
297
blueprint : initial_blueprint,
274
- requested_sorbet_batches : Arc :: new ( Mutex :: new ( RequestedObject :: new_with_repaint (
298
+ requested_query_result : Arc :: new ( Mutex :: new ( RequestedObject :: new_with_repaint (
275
299
runtime,
276
300
ui. ctx ( ) . clone ( ) ,
277
301
query. clone ( ) . execute ( ) ,
278
302
) ) ) ,
279
303
query,
280
- last_sorbet_batches : None ,
304
+ last_query_results : None ,
281
305
queried_at : Timestamp :: now ( ) ,
282
306
} ;
283
307
@@ -288,7 +312,7 @@ impl DataFusionAdapter {
288
312
table_state
289
313
} ) ;
290
314
291
- adapter. requested_sorbet_batches . lock ( ) . on_frame_start ( ) ;
315
+ adapter. requested_query_result . lock ( ) . on_frame_start ( ) ;
292
316
293
317
adapter
294
318
}
@@ -314,10 +338,10 @@ impl DataFusionAdapter {
314
338
if self . query . query_data != new_query_data {
315
339
self . query . query_data = new_query_data;
316
340
317
- let mut dataframe = self . requested_sorbet_batches . lock ( ) ;
341
+ let mut dataframe = self . requested_query_result . lock ( ) ;
318
342
319
343
if let Some ( Ok ( sorbet_batches) ) = dataframe. try_as_ref ( ) {
320
- self . last_sorbet_batches = Some ( sorbet_batches. clone ( ) ) ;
344
+ self . last_query_results = Some ( sorbet_batches. clone ( ) ) ;
321
345
}
322
346
323
347
* dataframe = RequestedObject :: new_with_repaint (
0 commit comments