Skip to content

Commit 9fd706b

Browse files
authored
Layout reader updates (#3400)
See #3382 --------- Signed-off-by: Nicholas Gates <[email protected]>
1 parent b948d5f commit 9fd706b

File tree

19 files changed

+232
-260
lines changed

19 files changed

+232
-260
lines changed

vortex-layout/src/layout.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
use std::any::Any;
2-
use std::collections::BTreeSet;
32
use std::fmt::{Debug, Formatter};
43
use std::sync::Arc;
54

65
use arcref::ArcRef;
76
use itertools::Itertools;
87
use vortex_array::{ArrayContext, SerializeMetadata};
9-
use vortex_dtype::{DType, FieldMask, FieldName};
8+
use vortex_dtype::{DType, FieldName};
109
use vortex_error::{VortexExpect, VortexResult, vortex_err};
1110

1211
use crate::segments::{SegmentId, SegmentSource};
@@ -48,13 +47,6 @@ pub trait Layout: 'static + Send + Sync + Debug + private::Sealed {
4847
/// Get the segment IDs for this layout.
4948
fn segment_ids(&self) -> Vec<SegmentId>;
5049

51-
fn register_splits(
52-
&self,
53-
field_mask: &[FieldMask],
54-
row_offset: u64,
55-
splits: &mut BTreeSet<u64>,
56-
) -> VortexResult<()>;
57-
5850
fn new_reader(
5951
&self,
6052
name: &Arc<str>,
@@ -249,15 +241,6 @@ impl<V: VTable> Layout for LayoutAdapter<V> {
249241
V::segment_ids(&self.0)
250242
}
251243

252-
fn register_splits(
253-
&self,
254-
field_mask: &[FieldMask],
255-
row_offset: u64,
256-
splits: &mut BTreeSet<u64>,
257-
) -> VortexResult<()> {
258-
V::register_splits(&self.0, field_mask, row_offset, splits)
259-
}
260-
261244
fn new_reader(
262245
&self,
263246
name: &Arc<str>,

vortex-layout/src/layouts/chunked/mod.rs

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
mod reader;
22
pub mod writer;
33

4-
use std::collections::BTreeSet;
54
use std::sync::Arc;
65

76
use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
8-
use vortex_dtype::{DType, FieldMask};
7+
use vortex_dtype::DType;
98
use vortex_error::VortexResult;
109

1110
use crate::children::LayoutChildren;
@@ -58,22 +57,6 @@ impl VTable for ChunkedVTable {
5857
LayoutChildType::Chunk((idx, layout.chunk_offsets[idx]))
5958
}
6059

61-
fn register_splits(
62-
layout: &Self::Layout,
63-
field_mask: &[FieldMask],
64-
row_offset: u64,
65-
splits: &mut BTreeSet<u64>,
66-
) -> VortexResult<()> {
67-
let mut offset = row_offset;
68-
for i in 0..layout.nchildren() {
69-
let child = layout.child(i)?;
70-
child.register_splits(field_mask, offset, splits)?;
71-
offset += child.row_count();
72-
splits.insert(offset);
73-
}
74-
Ok(())
75-
}
76-
7760
fn new_reader(
7861
layout: &Self::Layout,
7962
name: &Arc<str>,

vortex-layout/src/layouts/chunked/reader.rs

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use std::ops::{Deref, Range};
1+
use std::collections::BTreeSet;
2+
use std::ops::Range;
23
use std::sync::Arc;
34

45
use async_trait::async_trait;
@@ -7,8 +8,9 @@ use futures::stream::FuturesOrdered;
78
use futures::{FutureExt, TryStreamExt};
89
use itertools::Itertools;
910
use vortex_array::arrays::ChunkedArray;
11+
use vortex_array::stats::Precision;
1012
use vortex_array::{ArrayContext, ArrayRef};
11-
use vortex_dtype::DType;
13+
use vortex_dtype::{DType, FieldMask};
1214
use vortex_error::{VortexExpect, VortexResult};
1315
use vortex_expr::ExprRef;
1416
use vortex_mask::Mask;
@@ -17,7 +19,7 @@ use crate::layouts::chunked::ChunkedLayout;
1719
use crate::reader::LayoutReader;
1820
use crate::segments::SegmentSource;
1921
use crate::{
20-
ArrayEvaluation, Layout, LayoutReaderRef, LazyReaderChildren, MaskEvaluation, PruningEvaluation,
22+
ArrayEvaluation, LayoutReaderRef, LazyReaderChildren, MaskEvaluation, PruningEvaluation,
2123
};
2224

2325
/// A [`LayoutReader`] for chunked layouts.
@@ -111,19 +113,35 @@ impl ChunkedReader {
111113
}
112114
}
113115

114-
impl Deref for ChunkedReader {
115-
type Target = dyn Layout;
116-
117-
fn deref(&self) -> &Self::Target {
118-
self.layout.as_ref()
119-
}
120-
}
121-
122116
impl LayoutReader for ChunkedReader {
123117
fn name(&self) -> &Arc<str> {
124118
&self.name
125119
}
126120

121+
fn dtype(&self) -> &DType {
122+
self.layout.dtype()
123+
}
124+
125+
fn row_count(&self) -> Precision<u64> {
126+
Precision::Exact(self.layout.row_count())
127+
}
128+
129+
fn register_splits(
130+
&self,
131+
field_mask: &[FieldMask],
132+
row_offset: u64,
133+
splits: &mut BTreeSet<u64>,
134+
) -> VortexResult<()> {
135+
let mut offset = row_offset;
136+
for i in 0..self.layout.nchildren() {
137+
let child = self.chunk_reader(i)?;
138+
child.register_splits(field_mask, offset, splits)?;
139+
offset += self.layout.child(i)?.row_count();
140+
splits.insert(offset);
141+
}
142+
Ok(())
143+
}
144+
127145
fn pruning_evaluation(
128146
&self,
129147
row_range: &Range<u64>,

vortex-layout/src/layouts/dict/mod.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
mod reader;
22
pub mod writer;
33

4-
use std::collections::BTreeSet;
54
use std::sync::Arc;
65

76
use reader::DictReader;
87
use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
9-
use vortex_dtype::{DType, FieldMask, PType};
8+
use vortex_dtype::{DType, PType};
109
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic};
1110

1211
use crate::children::LayoutChildren;
@@ -68,15 +67,6 @@ impl VTable for DictVTable {
6867
}
6968
}
7069

71-
fn register_splits(
72-
layout: &Self::Layout,
73-
field_mask: &[FieldMask],
74-
row_offset: u64,
75-
splits: &mut BTreeSet<u64>,
76-
) -> VortexResult<()> {
77-
layout.codes.register_splits(field_mask, row_offset, splits)
78-
}
79-
8070
fn new_reader(
8171
layout: &Self::Layout,
8272
name: &Arc<str>,

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
use std::ops::{BitAnd, Deref, Range};
1+
use std::collections::BTreeSet;
2+
use std::ops::{BitAnd, Range};
23
use std::sync::{Arc, OnceLock};
34

45
use async_trait::async_trait;
56
use dashmap::DashMap;
67
use futures::{FutureExt, join};
78
use vortex_array::arrays::StructArray;
89
use vortex_array::compute::{MinMaxResult, filter, min_max};
10+
use vortex_array::stats::Precision;
911
use vortex_array::{Array, ArrayContext, ArrayRef, ToCanonical};
1012
use vortex_dict::DictArray;
13+
use vortex_dtype::{DType, FieldMask};
1114
use vortex_error::{VortexExpect, VortexResult};
1215
use vortex_expr::{ExprRef, Identity};
1316
use vortex_mask::Mask;
@@ -16,7 +19,7 @@ use super::DictLayout;
1619
use crate::layouts::SharedArrayFuture;
1720
use crate::segments::SegmentSource;
1821
use crate::{
19-
ArrayEvaluation, Layout, LayoutReader, LayoutReaderRef, MaskEvaluation, NoOpPruningEvaluation,
22+
ArrayEvaluation, LayoutReader, LayoutReaderRef, MaskEvaluation, NoOpPruningEvaluation,
2023
PruningEvaluation,
2124
};
2225

@@ -25,6 +28,8 @@ pub struct DictReader {
2528
#[allow(dead_code)] // Typically used for logging
2629
name: Arc<str>,
2730

31+
/// Length of the values array
32+
values_len: usize,
2833
/// Cached dict values array
2934
values_array: OnceLock<SharedArrayFuture>,
3035
/// Cache of expression evaluation results on the values array by expression
@@ -34,21 +39,14 @@ pub struct DictReader {
3439
codes: LayoutReaderRef,
3540
}
3641

37-
impl Deref for DictReader {
38-
type Target = dyn Layout;
39-
40-
fn deref(&self) -> &Self::Target {
41-
self.layout.deref()
42-
}
43-
}
44-
4542
impl DictReader {
4643
pub(super) fn try_new(
4744
layout: DictLayout,
4845
name: Arc<str>,
4946
segment_source: &Arc<dyn SegmentSource>,
5047
ctx: &ArrayContext,
5148
) -> VortexResult<Self> {
49+
let values_len = usize::try_from(layout.values.row_count())?;
5250
let values =
5351
layout
5452
.values
@@ -61,6 +59,7 @@ impl DictReader {
6159
Ok(Self {
6260
layout,
6361
name,
62+
values_len,
6463
values_array: Default::default(),
6564
values_evals: Default::default(),
6665
values,
@@ -71,21 +70,18 @@ impl DictReader {
7170
fn values_array(&self) -> SharedArrayFuture {
7271
// We capture the name, so it may be wrong if we re-use the same reader within multiple
7372
// different parent readers. But that's rare...
73+
let values_len = self.values_len;
7474
self.values_array
7575
.get_or_init(move || {
76-
let values_len = self.values.row_count();
7776
let eval = self
7877
.values
79-
.projection_evaluation(&(0..values_len), &Identity::new_expr())
78+
.projection_evaluation(&(0..values_len as u64), &Identity::new_expr())
8079
.vortex_expect("must construct dict values array evaluation");
8180

8281
async move {
83-
eval.invoke(Mask::new_true(
84-
usize::try_from(values_len)
85-
.vortex_expect("dict values length must fit in u32"),
86-
))
87-
.await
88-
.map_err(Arc::new)
82+
eval.invoke(Mask::new_true(values_len))
83+
.await
84+
.map_err(Arc::new)
8985
}
9086
.boxed()
9187
.shared()
@@ -111,6 +107,23 @@ impl LayoutReader for DictReader {
111107
&self.name
112108
}
113109

110+
fn dtype(&self) -> &DType {
111+
self.layout.dtype()
112+
}
113+
114+
fn row_count(&self) -> Precision<u64> {
115+
Precision::Exact(self.layout.row_count())
116+
}
117+
118+
fn register_splits(
119+
&self,
120+
field_mask: &[FieldMask],
121+
row_offset: u64,
122+
splits: &mut BTreeSet<u64>,
123+
) -> VortexResult<()> {
124+
self.codes.register_splits(field_mask, row_offset, splits)
125+
}
126+
114127
fn pruning_evaluation(
115128
&self,
116129
_row_range: &Range<u64>,

vortex-layout/src/layouts/filter.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
use std::collections::BTreeSet;
12
use std::iter;
2-
use std::ops::{BitAnd, Deref, Range};
3+
use std::ops::{BitAnd, Range};
34
use std::sync::Arc;
45

56
use async_trait::async_trait;
@@ -8,14 +9,14 @@ use dashmap::DashMap;
89
use itertools::Itertools;
910
use parking_lot::RwLock;
1011
use sketches_ddsketch::DDSketch;
12+
use vortex_array::stats::Precision;
13+
use vortex_dtype::{DType, FieldMask};
1114
use vortex_error::{VortexExpect, VortexResult, vortex_err, vortex_panic};
1215
use vortex_expr::ExprRef;
1316
use vortex_expr::forms::cnf::cnf;
1417
use vortex_mask::Mask;
1518

16-
use crate::{
17-
ArrayEvaluation, Layout, LayoutReader, LayoutReaderRef, MaskEvaluation, PruningEvaluation,
18-
};
19+
use crate::{ArrayEvaluation, LayoutReader, LayoutReaderRef, MaskEvaluation, PruningEvaluation};
1920

2021
/// The selectivity histogram quantile to use for reordering conjuncts. Where 0 == no rows match.
2122
const DEFAULT_SELECTIVITY_QUANTILE: f64 = 0.1;
@@ -40,19 +41,29 @@ impl FilterLayoutReader {
4041
}
4142
}
4243

43-
impl Deref for FilterLayoutReader {
44-
type Target = dyn Layout;
45-
46-
fn deref(&self) -> &Self::Target {
47-
self.child.deref()
48-
}
49-
}
50-
5144
impl LayoutReader for FilterLayoutReader {
5245
fn name(&self) -> &Arc<str> {
5346
self.child.name()
5447
}
5548

49+
fn dtype(&self) -> &DType {
50+
self.child.dtype()
51+
}
52+
53+
fn row_count(&self) -> Precision<u64> {
54+
self.child.row_count()
55+
}
56+
57+
fn register_splits(
58+
&self,
59+
field_mask: &[FieldMask],
60+
row_offset: u64,
61+
splits: &mut BTreeSet<u64>,
62+
) -> VortexResult<()> {
63+
// Pass-through the splits to the child layout reader.
64+
self.child.register_splits(field_mask, row_offset, splits)
65+
}
66+
5667
fn pruning_evaluation(
5768
&self,
5869
row_range: &Range<u64>,

vortex-layout/src/layouts/flat/mod.rs

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
mod reader;
22
pub mod writer;
33

4-
use std::collections::BTreeSet;
54
use std::sync::Arc;
65

76
use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
8-
use vortex_dtype::{DType, FieldMask};
7+
use vortex_dtype::DType;
98
use vortex_error::{VortexResult, vortex_bail, vortex_panic};
109

1110
use crate::children::LayoutChildren;
@@ -58,21 +57,6 @@ impl VTable for FlatVTable {
5857
vortex_panic!("Flat layout has no children");
5958
}
6059

61-
fn register_splits(
62-
layout: &Self::Layout,
63-
field_mask: &[FieldMask],
64-
row_offset: u64,
65-
splits: &mut BTreeSet<u64>,
66-
) -> VortexResult<()> {
67-
for path in field_mask {
68-
if path.matches_root() {
69-
splits.insert(row_offset + layout.row_count());
70-
break;
71-
}
72-
}
73-
Ok(())
74-
}
75-
7660
fn new_reader(
7761
layout: &Self::Layout,
7862
name: &Arc<str>,

0 commit comments

Comments
 (0)