Skip to content

Commit e570ee4

Browse files
authored
Exact layout reader row count (#5088)
This was an unused idea from a while back. Putting them back to exact. Signed-off-by: Nicholas Gates <[email protected]>
1 parent 4980e82 commit e570ee4

File tree

9 files changed

+15
-31
lines changed

9 files changed

+15
-31
lines changed

vortex-layout/src/layout.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,6 @@ pub enum LayoutChildType {
7777
/// A layout child that represents a single field of data.
7878
/// Contains the field name of the child.
7979
Field(FieldName),
80-
// A layout child that contains a subset of the fields of the parent layout.
81-
// Contains a mask over the fields of the parent layout.
82-
// TODO(ngates): FieldMask API needs fixing before we enable this. We also don't yet have a
83-
// use-case for this.
84-
// Mask(Vec<FieldMask>),
8580
}
8681

8782
impl LayoutChildType {

vortex-layout/src/layouts/chunked/reader.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use futures::future::BoxFuture;
99
use futures::stream::FuturesOrdered;
1010
use futures::{FutureExt, TryStreamExt};
1111
use vortex_array::arrays::ChunkedArray;
12-
use vortex_array::stats::Precision;
1312
use vortex_array::{ArrayRef, MaskFuture};
1413
use vortex_dtype::{DType, FieldMask};
1514
use vortex_error::{VortexExpect, VortexResult, vortex_panic};
@@ -142,8 +141,8 @@ impl LayoutReader for ChunkedReader {
142141
self.layout.dtype()
143142
}
144143

145-
fn row_count(&self) -> Precision<u64> {
146-
Precision::Exact(self.layout.row_count())
144+
fn row_count(&self) -> u64 {
145+
self.layout.row_count()
147146
}
148147

149148
fn register_splits(

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use std::sync::{Arc, OnceLock};
88
use futures::future::BoxFuture;
99
use futures::{FutureExt, TryFutureExt, try_join};
1010
use vortex_array::compute::{MinMaxResult, min_max, take};
11-
use vortex_array::stats::Precision;
1211
use vortex_array::{ArrayRef, MaskFuture};
1312
use vortex_dict::DictArray;
1413
use vortex_dtype::{DType, FieldMask};
@@ -105,8 +104,8 @@ impl LayoutReader for DictReader {
105104
self.layout.dtype()
106105
}
107106

108-
fn row_count(&self) -> Precision<u64> {
109-
Precision::Exact(self.layout.row_count())
107+
fn row_count(&self) -> u64 {
108+
self.layout.row_count()
110109
}
111110

112111
fn register_splits(

vortex-layout/src/layouts/flat/reader.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use futures::FutureExt;
99
use futures::future::BoxFuture;
1010
use vortex_array::compute::filter;
1111
use vortex_array::serde::ArrayParts;
12-
use vortex_array::stats::Precision;
1312
use vortex_array::{Array, ArrayRef, MaskFuture};
1413
use vortex_dtype::{DType, FieldMask};
1514
use vortex_error::{VortexExpect, VortexResult, VortexUnwrap as _};
@@ -78,8 +77,8 @@ impl LayoutReader for FlatReader {
7877
self.layout.dtype()
7978
}
8079

81-
fn row_count(&self) -> Precision<u64> {
82-
Precision::Exact(self.layout.row_count())
80+
fn row_count(&self) -> u64 {
81+
self.layout.row_count()
8382
}
8483

8584
fn register_splits(

vortex-layout/src/layouts/row_idx/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ pub use expr::*;
1313
use futures::FutureExt;
1414
use futures::future::BoxFuture;
1515
use vortex_array::compute::filter;
16-
use vortex_array::stats::Precision;
1716
use vortex_array::{ArrayRef, IntoArray, MaskFuture};
1817
use vortex_dtype::{DType, FieldMask, FieldName, Nullability, PType};
1918
use vortex_error::{VortexExpect, VortexResult};
@@ -130,7 +129,7 @@ impl LayoutReader for RowIdxLayoutReader {
130129
self.child.dtype()
131130
}
132131

133-
fn row_count(&self) -> Precision<u64> {
132+
fn row_count(&self) -> u64 {
134133
self.child.row_count()
135134
}
136135

vortex-layout/src/layouts/struct_/reader.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use std::sync::Arc;
77

88
use itertools::Itertools;
99
use vortex_array::MaskFuture;
10-
use vortex_array::stats::Precision;
1110
use vortex_dtype::{DType, FieldMask, FieldName, StructFields};
1211
use vortex_error::{VortexExpect, VortexResult, vortex_err};
1312
use vortex_expr::transform::immediate_access::annotate_scope_access;
@@ -169,8 +168,8 @@ impl LayoutReader for StructReader {
169168
self.layout.dtype()
170169
}
171170

172-
fn row_count(&self) -> Precision<u64> {
173-
Precision::Exact(self.layout.row_count())
171+
fn row_count(&self) -> u64 {
172+
self.layout.row_count()
174173
}
175174

176175
fn register_splits(

vortex-layout/src/layouts/zoned/reader.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ use futures::future::{BoxFuture, Shared};
99
use futures::{FutureExt, TryFutureExt};
1010
use itertools::Itertools;
1111
use parking_lot::RwLock;
12-
use vortex_array::stats::Precision;
1312
use vortex_array::{ArrayRef, MaskFuture, ToCanonical};
1413
use vortex_buffer::BitBufferMut;
1514
use vortex_dtype::{DType, FieldMask, FieldPath, FieldPathSet};
@@ -186,8 +185,8 @@ impl LayoutReader for ZonedReader {
186185
self.layout.dtype()
187186
}
188187

189-
fn row_count(&self) -> Precision<u64> {
190-
Precision::exact(self.layout.row_count())
188+
fn row_count(&self) -> u64 {
189+
self.layout.row_count()
191190
}
192191

193192
fn register_splits(

vortex-layout/src/reader.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use std::sync::Arc;
77

88
use futures::future::BoxFuture;
99
use once_cell::sync::OnceCell;
10-
use vortex_array::stats::Precision;
1110
use vortex_array::{ArrayRef, MaskFuture};
1211
use vortex_dtype::{DType, FieldMask};
1312
use vortex_error::{VortexResult, vortex_bail};
@@ -28,9 +27,8 @@ pub trait LayoutReader: 'static + Send + Sync {
2827
/// Returns the un-projected dtype of the layout reader.
2928
fn dtype(&self) -> &DType;
3029

31-
/// Returns the number of rows in the layout reader.
32-
/// An inexact count may be larger or smaller than the actual row count.
33-
fn row_count(&self) -> Precision<u64>;
30+
/// Returns the number of rows in the layout.
31+
fn row_count(&self) -> u64;
3432

3533
/// Register the splits of this layout reader.
3634
// TODO(ngates): this is a temporary API until we make layout readers stream based.

vortex-scan/src/split_by.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
use std::collections::BTreeSet;
55
use std::iter::once;
66

7-
use vortex_array::stats::StatBound;
87
use vortex_dtype::FieldMask;
9-
use vortex_error::{VortexResult, vortex_err};
8+
use vortex_error::VortexResult;
109
use vortex_layout::LayoutReader;
1110

1211
/// Defines how the Vortex file is split into batches for reading.
@@ -40,9 +39,7 @@ impl SplitBy {
4039
row_splits
4140
}
4241
SplitBy::RowCount(n) => {
43-
let row_count = *layout_reader.row_count().to_exact().ok_or_else(|| {
44-
vortex_err!("Cannot split layout by row count, row count is not exact")
45-
})?;
42+
let row_count = layout_reader.row_count();
4643
(0..row_count).step_by(n).chain(once(row_count)).collect()
4744
}
4845
})

0 commit comments

Comments
 (0)