Skip to content

Commit ff65709

Browse files
committed
LazyReaderChildren only allocate once
Signed-off-by: Andrew Duffy <[email protected]>
1 parent 592d6e9 commit ff65709

File tree

4 files changed

+54
-40
lines changed

4 files changed

+54
-40
lines changed

vortex-layout/src/layouts/chunked/reader.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::sync::Arc;
77

88
use futures::future::BoxFuture;
99
use futures::stream::FuturesOrdered;
10-
use futures::{FutureExt, TryStreamExt};
10+
use futures::{FutureExt, StreamExt, TryStreamExt};
1111
use vortex_array::arrays::ChunkedArray;
1212
use vortex_array::{ArrayRef, MaskFuture};
1313
use vortex_dtype::{DType, FieldMask};
@@ -43,7 +43,12 @@ impl ChunkedReader {
4343
}
4444
chunk_offsets[nchildren] = layout.row_count();
4545

46-
let lazy_children = LazyReaderChildren::new(layout.children.clone(), segment_source);
46+
let dtypes = vec![layout.dtype.clone(); nchildren];
47+
let names = (0..nchildren)
48+
.map(|idx| Arc::from(format!("{name}.[{idx}]")))
49+
.collect();
50+
let lazy_children =
51+
LazyReaderChildren::new(layout.children.clone(), dtypes, names, segment_source);
4752

4853
Self {
4954
layout,
@@ -55,11 +60,7 @@ impl ChunkedReader {
5560

5661
/// Return the [`LayoutReader`] for the given chunk.
5762
fn chunk_reader(&self, idx: usize) -> VortexResult<&LayoutReaderRef> {
58-
self.lazy_children.get(
59-
idx,
60-
self.layout.dtype(),
61-
&format!("{}.[{}]", self.name, idx).into(),
62-
)
63+
self.lazy_children.get(idx)
6364
}
6465

6566
fn chunk_offset(&self, idx: usize) -> u64 {

vortex-layout/src/layouts/struct_/reader.rs

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,24 @@ impl StructReader {
5858
.collect()
5959
});
6060

61-
let lazy_children =
62-
LazyReaderChildren::new(layout.children.clone(), segment_source.clone());
61+
let mut dtypes: Vec<DType> = struct_dt.fields().collect();
62+
let mut names: Vec<Arc<str>> = struct_dt
63+
.names()
64+
.iter()
65+
.map(|x| x.inner().clone())
66+
.collect();
67+
68+
if layout.dtype.is_nullable() {
69+
dtypes.insert(0, DType::Bool(Nullability::NonNullable));
70+
names.insert(0, Arc::from("validity"));
71+
}
72+
73+
let lazy_children = LazyReaderChildren::new(
74+
layout.children.clone(),
75+
dtypes,
76+
names,
77+
segment_source.clone(),
78+
);
6379

6480
// Create an expanded root expression that contains all fields of the struct.
6581
let expanded_root_expr = replace_root_fields(root(), struct_dt);
@@ -105,24 +121,14 @@ impl StructReader {
105121
.field_by_index(idx)
106122
.ok_or_else(|| vortex_err!("Missing field {idx}"))?;
107123
let name = &self.struct_fields().names()[idx];
108-
self.lazy_children.get(
109-
child_index,
110-
&field_dtype,
111-
&format!("{}.{}", self.name, name).into(),
112-
)
124+
self.lazy_children.get(child_index)
113125
}
114126

115127
/// Return the reader for the struct validity, if present
116128
fn validity(&self) -> VortexResult<Option<&LayoutReaderRef>> {
117129
self.dtype()
118130
.is_nullable()
119-
.then(|| {
120-
self.lazy_children.get(
121-
0,
122-
&DType::Bool(Nullability::NonNullable),
123-
&"validity".into(),
124-
)
125-
})
131+
.then(|| self.lazy_children.get(0))
126132
.transpose()
127133
}
128134

vortex-layout/src/layouts/zoned/reader.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,17 @@ impl ZonedReader {
5050
name: Arc<str>,
5151
segment_source: Arc<dyn SegmentSource>,
5252
) -> VortexResult<Self> {
53-
let lazy_children =
54-
LazyReaderChildren::new(layout.children.clone(), segment_source.clone());
53+
let dtypes = vec![
54+
layout.dtype.clone(),
55+
ZoneMap::dtype_for_stats_table(layout.dtype(), layout.present_stats()),
56+
];
57+
let names = vec![name.clone(), format!("{}.zones", name).into()];
58+
let lazy_children = LazyReaderChildren::new(
59+
layout.children.clone(),
60+
dtypes,
61+
names,
62+
segment_source.clone(),
63+
);
5564

5665
Ok(Self {
5766
layout,
@@ -65,7 +74,7 @@ impl ZonedReader {
6574

6675
#[inline]
6776
fn data_child(&self) -> VortexResult<&LayoutReaderRef> {
68-
self.lazy_children.get(0, self.layout.dtype(), &self.name)
77+
self.lazy_children.get(0)
6978
}
7079

7180
/// Get or create the pruning predicate for a given expression.
@@ -97,14 +106,7 @@ impl ZonedReader {
97106

98107
let zones_eval = self
99108
.lazy_children
100-
.get(
101-
1,
102-
&ZoneMap::dtype_for_stats_table(
103-
self.layout.dtype(),
104-
self.layout.present_stats(),
105-
),
106-
&format!("{}.zones", self.name).into(),
107-
)
109+
.get(1)
108110
.vortex_expect("failed to get zone child")
109111
.projection_evaluation(
110112
&(0..nzones as u64),

vortex-layout/src/reader.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -101,36 +101,41 @@ impl ArrayFutureExt for ArrayFuture {
101101

102102
pub struct LazyReaderChildren {
103103
children: Arc<dyn LayoutChildren>,
104+
dtypes: Vec<DType>,
105+
names: Vec<Arc<str>>,
104106
segment_source: Arc<dyn SegmentSource>,
105107

106108
// TODO(ngates): we may want a hash map of some sort here?
107109
cache: Vec<OnceCell<LayoutReaderRef>>,
108110
}
109111

110112
impl LazyReaderChildren {
111-
pub fn new(children: Arc<dyn LayoutChildren>, segment_source: Arc<dyn SegmentSource>) -> Self {
113+
pub fn new(
114+
children: Arc<dyn LayoutChildren>,
115+
dtypes: Vec<DType>,
116+
names: Vec<Arc<str>>,
117+
segment_source: Arc<dyn SegmentSource>,
118+
) -> Self {
112119
let nchildren = children.nchildren();
113120
let cache = (0..nchildren).map(|_| OnceCell::new()).collect();
114121
Self {
115122
children,
123+
dtypes,
124+
names,
116125
segment_source,
117126
cache,
118127
}
119128
}
120129

121-
pub fn get(
122-
&self,
123-
idx: usize,
124-
dtype: &DType,
125-
name: &Arc<str>,
126-
) -> VortexResult<&LayoutReaderRef> {
130+
pub fn get(&self, idx: usize) -> VortexResult<&LayoutReaderRef> {
127131
if idx >= self.cache.len() {
128132
vortex_bail!("Child index out of bounds: {} of {}", idx, self.cache.len());
129133
}
130134

131135
self.cache[idx].get_or_try_init(|| {
136+
let dtype = &self.dtypes[idx];
132137
let child = self.children.child(idx, dtype)?;
133-
child.new_reader(name.clone(), self.segment_source.clone())
138+
child.new_reader(Arc::clone(&self.names[idx]), self.segment_source.clone())
134139
})
135140
}
136141
}

0 commit comments

Comments
 (0)