Skip to content

Commit 4ea291f

Browse files
committed
only register_splits for active row_range in partitions
Signed-off-by: Andrew Duffy <[email protected]>
1 parent bd80602 commit 4ea291f

File tree

11 files changed

+60
-35
lines changed

11 files changed

+60
-35
lines changed

bench-vortex/src/realnest/gharchive.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ impl Benchmark for GithubArchive {
230230
fn data_url(&self) -> &Url {
231231
&self.data_url
232232
}
233+
234+
fn expected_row_counts(&self) -> Option<&[usize]> {
235+
Some(&[1, 2, 100, 10, 82468])
236+
}
233237
}
234238

235239
pub async fn register_table(

vortex-file/src/file.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,9 @@ impl VortexFile {
137137
}
138138

139139
pub fn splits(&self) -> VortexResult<Vec<Range<u64>>> {
140+
let reader = self.layout_reader()?;
140141
Ok(SplitBy::Layout
141-
.splits(self.layout_reader()?.as_ref(), &[FieldMask::All])?
142+
.splits(reader.as_ref(), &(0..reader.row_count()), &[FieldMask::All])?
142143
.into_iter()
143144
.tuple_windows()
144145
.map(|(start, end)| start..end)

vortex-layout/src/layouts/chunked/reader.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::sync::Arc;
77

88
use futures::future::BoxFuture;
99
use futures::stream::FuturesOrdered;
10-
use futures::{FutureExt, StreamExt, TryStreamExt};
10+
use futures::{FutureExt, TryStreamExt};
1111
use vortex_array::arrays::ChunkedArray;
1212
use vortex_array::{ArrayRef, MaskFuture};
1313
use vortex_dtype::{DType, FieldMask};
@@ -149,16 +149,23 @@ impl LayoutReader for ChunkedReader {
149149
fn register_splits(
150150
&self,
151151
field_mask: &[FieldMask],
152-
row_offset: u64,
152+
row_range: &Range<u64>,
153153
splits: &mut BTreeSet<u64>,
154154
) -> VortexResult<()> {
155-
let mut offset = row_offset;
155+
let mut offset = row_range.start;
156156
for i in 0..self.layout.nchildren() {
157+
// Bail early if the row range only overlaps with a subset of the chunks
158+
if offset >= row_range.end {
159+
break;
160+
}
161+
157162
let child = self.chunk_reader(i)?;
158-
child.register_splits(field_mask, offset, splits)?;
159-
offset += self.layout.child(i)?.row_count();
163+
let child_range = offset..offset + child.row_count();
164+
child.register_splits(field_mask, &child_range, splits)?;
165+
offset = child_range.end;
160166
splits.insert(offset);
161167
}
168+
162169
Ok(())
163170
}
164171

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,10 @@ impl LayoutReader for DictReader {
111111
fn register_splits(
112112
&self,
113113
field_mask: &[FieldMask],
114-
row_offset: u64,
114+
row_range: &Range<u64>,
115115
splits: &mut BTreeSet<u64>,
116116
) -> VortexResult<()> {
117-
self.codes.register_splits(field_mask, row_offset, splits)
117+
self.codes.register_splits(field_mask, row_range, splits)
118118
}
119119

120120
fn pruning_evaluation(

vortex-layout/src/layouts/flat/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ impl LayoutReader for FlatReader {
8484
fn register_splits(
8585
&self,
8686
_field_mask: &[FieldMask],
87-
row_offset: u64,
87+
row_range: &Range<u64>,
8888
splits: &mut BTreeSet<u64>,
8989
) -> VortexResult<()> {
90-
splits.insert(row_offset + self.layout.row_count());
90+
splits.insert(row_range.end);
9191
Ok(())
9292
}
9393

vortex-layout/src/layouts/row_idx/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,10 @@ impl LayoutReader for RowIdxLayoutReader {
136136
fn register_splits(
137137
&self,
138138
field_mask: &[FieldMask],
139-
row_offset: u64,
139+
row_range: &Range<u64>,
140140
splits: &mut BTreeSet<u64>,
141141
) -> VortexResult<()> {
142-
self.child.register_splits(field_mask, row_offset, splits)
142+
self.child.register_splits(field_mask, row_range, splits)
143143
}
144144

145145
fn pruning_evaluation(

vortex-layout/src/layouts/struct_/reader.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use std::sync::Arc;
88
use futures::try_join;
99
use itertools::Itertools;
1010
use vortex_array::arrays::StructArray;
11-
use vortex_array::stats::Precision;
1211
use vortex_array::vtable::ValidityHelper;
1312
use vortex_array::{ArrayRef, IntoArray, MaskFuture, ToCanonical};
1413
use vortex_dtype::{DType, FieldMask, FieldName, Nullability, StructFields};
@@ -116,11 +115,6 @@ impl StructReader {
116115
idx
117116
};
118117

119-
let field_dtype = self
120-
.struct_fields()
121-
.field_by_index(idx)
122-
.ok_or_else(|| vortex_err!("Missing field {idx}"))?;
123-
let name = &self.struct_fields().names()[idx];
124118
self.lazy_children.get(child_index)
125119
}
126120

@@ -210,15 +204,15 @@ impl LayoutReader for StructReader {
210204
fn register_splits(
211205
&self,
212206
field_mask: &[FieldMask],
213-
row_offset: u64,
207+
row_range: &Range<u64>,
214208
splits: &mut BTreeSet<u64>,
215209
) -> VortexResult<()> {
216210
// In the case of an empty struct, we need to register the end split.
217-
splits.insert(row_offset + self.layout.row_count);
211+
splits.insert(row_range.end);
218212

219213
self.layout.matching_fields(field_mask, |mask, idx| {
220214
self.field_reader_by_index(idx)?
221-
.register_splits(&[mask], row_offset, splits)
215+
.register_splits(&[mask], row_range, splits)
222216
})
223217
}
224218

vortex-layout/src/layouts/zoned/reader.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,11 @@ impl LayoutReader for ZonedReader {
194194
fn register_splits(
195195
&self,
196196
field_mask: &[FieldMask],
197-
row_offset: u64,
197+
row_range: &Range<u64>,
198198
splits: &mut BTreeSet<u64>,
199199
) -> VortexResult<()> {
200200
self.data_child()?
201-
.register_splits(field_mask, row_offset, splits)
201+
.register_splits(field_mask, row_range, splits)
202202
}
203203

204204
fn pruning_evaluation(

vortex-layout/src/reader.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub trait LayoutReader: 'static + Send + Sync {
3636
fn register_splits(
3737
&self,
3838
field_mask: &[FieldMask],
39-
row_offset: u64,
39+
row_range: &Range<u64>,
4040
splits: &mut BTreeSet<u64>,
4141
) -> VortexResult<()>;
4242

@@ -104,7 +104,6 @@ pub struct LazyReaderChildren {
104104
dtypes: Vec<DType>,
105105
names: Vec<Arc<str>>,
106106
segment_source: Arc<dyn SegmentSource>,
107-
108107
// TODO(ngates): we may want a hash map of some sort here?
109108
cache: Vec<OnceCell<LayoutReaderRef>>,
110109
}

vortex-scan/src/scan_builder.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,15 @@ impl<A: 'static + Send> ScanBuilder<A> {
237237
if let Some(ranges) = attempt_split_ranges(&self.selection, self.row_range.as_ref()) {
238238
Splits::Ranges(ranges)
239239
} else {
240-
Splits::Natural(self.split_by.splits(layout_reader.as_ref(), &field_mask)?)
240+
let split_range = self
241+
.row_range
242+
.clone()
243+
.unwrap_or_else(|| 0..layout_reader.row_count());
244+
Splits::Natural(self.split_by.splits(
245+
layout_reader.as_ref(),
246+
&split_range,
247+
&field_mask,
248+
)?)
241249
};
242250

243251
Ok(RepeatedScan::new(

0 commit comments

Comments
 (0)