Skip to content

Commit 570d987

Browse files
committed
EXPERIMENTAL COMMIT: always canonicalize dict values eagerly
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 988e4ff commit 570d987

File tree

5 files changed

+10
-1
lines changed

5 files changed

+10
-1
lines changed

vortex-datafusion/src/persistent/opener.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ impl FileOpener for VortexOpener {
251251
FieldName::from(field.name().as_str())
252252
})
253253
.collect::<Vec<_>>();
254+
// println!("FIELDS: {fields:?}");
254255
let projection_expr = select(fields, root());
255256

256257
// We share our layout readers with others partitions in the scan, so we can only need to read each layout in each file once.

vortex-datafusion/src/persistent/source.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ impl FileSource for VortexSource {
133133
};
134134

135135
let projection = base_config.file_column_projection_indices().map(Arc::from);
136+
// println!("CREATE FILE OPENER: projection {projection:?}");
136137

137138
let opener = VortexOpener {
138139
object_store,

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::sync::{Arc, OnceLock};
88
use futures::future::BoxFuture;
99
use futures::{FutureExt, TryFutureExt, try_join};
1010
use vortex_array::compute::{MinMaxResult, min_max, take};
11-
use vortex_array::{ArrayRef, MaskFuture};
11+
use vortex_array::{Array, ArrayRef, IntoArray, MaskFuture};
1212
use vortex_dict::DictArray;
1313
use vortex_dtype::{DType, FieldMask};
1414
use vortex_error::{VortexError, VortexExpect, VortexResult};
@@ -76,6 +76,7 @@ impl DictReader {
7676
)
7777
.vortex_expect("must construct dict values array evaluation")
7878
.map_err(Arc::new)
79+
.map_ok(|arr| arr.to_canonical().into_array())
7980
.boxed()
8081
.shared()
8182
})

vortex-scan/src/split_by.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ impl SplitBy {
3939
// Register all splits in the row range for all layouts that are needed
4040
// to read the field mask.
4141
layout_reader.register_splits(field_mask, row_range, &mut row_splits)?;
42+
// Iterate the splits, show how large they are on average
43+
//for (&start, &end) in row_splits.iter().tuples() {
44+
// println!("split size: {}", end - start);
45+
//}
4246
row_splits
4347
}
4448
SplitBy::RowCount(n) => row_range

vortex-scan/src/tasks.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ pub(super) fn split_exec<A: 'static + Send>(
4545
return Ok(ok(None).boxed());
4646
}
4747

48+
// println!("row_range: {}", row_range.end - row_range.start);
49+
4850
let filter_mask = match ctx.filter.as_ref() {
4951
// No filter == immediate mask
5052
None => {

0 commit comments

Comments
 (0)