Skip to content

Commit bf52a05

Browse files
authored
Add more non copying stats operations; replace stats replace with inherit_from (#4279)
Signed-off-by: Robert Kruszewski <[email protected]>
1 parent 1a0ec03 commit bf52a05

File tree

10 files changed

+49
-53
lines changed

10 files changed

+49
-53
lines changed

vortex-array/src/array/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::arrays::{
2222
use crate::builders::ArrayBuilder;
2323
use crate::compute::{ComputeFn, Cost, InvocationArgs, IsConstantOpts, Output, is_constant_opts};
2424
use crate::serde::ArrayChildren;
25-
use crate::stats::{Precision, Stat, StatsSetRef};
25+
use crate::stats::{Precision, Stat, StatsProviderExt, StatsSetRef};
2626
use crate::vtable::{
2727
ArrayVTable, CanonicalVTable, ComputeVTable, OperationsVTable, SerdeVTable, VTable,
2828
ValidityVTable, VisitorVTable,
@@ -516,7 +516,7 @@ impl<V: VTable> Array for ArrayAdapter<V> {
516516
canonical
517517
.as_ref()
518518
.statistics()
519-
.replace(self.statistics().to_owned());
519+
.inherit_from(self.statistics());
520520
Ok(canonical)
521521
}
522522

vortex-array/src/compute/is_constant.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use vortex_scalar::Scalar;
1212
use crate::Array;
1313
use crate::arrays::{ConstantVTable, NullVTable};
1414
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
15-
use crate::stats::{Precision, Stat, StatsProvider};
15+
use crate::stats::{Precision, Stat, StatsProvider, StatsProviderExt};
1616
use crate::vtable::VTable;
1717

1818
static IS_CONSTANT_FN: LazyLock<ComputeFn> = LazyLock::new(|| {

vortex-array/src/compute/is_sorted.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use vortex_scalar::Scalar;
1212
use crate::Array;
1313
use crate::arrays::{ConstantVTable, NullVTable};
1414
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Options, Output};
15-
use crate::stats::{Precision, Stat};
15+
use crate::stats::{Precision, Stat, StatsProviderExt};
1616
use crate::vtable::VTable;
1717

1818
static IS_SORTED_FN: LazyLock<ComputeFn> = LazyLock::new(|| {

vortex-array/src/compute/nan_count.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use vortex_scalar::{Scalar, ScalarValue};
1010

1111
use crate::Array;
1212
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output, UnaryArgs};
13-
use crate::stats::{Precision, Stat};
13+
use crate::stats::{Precision, Stat, StatsProviderExt};
1414
use crate::vtable::VTable;
1515

1616
static NAN_COUNT_FN: LazyLock<ComputeFn> = LazyLock::new(|| {

vortex-array/src/compute/take.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use vortex_scalar::Scalar;
1010

1111
use crate::arrays::{ConstantArray, ConstantVTable};
1212
use crate::compute::{ComputeFn, ComputeFnVTable, InvocationArgs, Kernel, Output};
13-
use crate::stats::{Precision, Stat, StatsSet};
13+
use crate::stats::{Precision, Stat, StatsProviderExt, StatsSet};
1414
use crate::vtable::VTable;
1515
use crate::{Array, ArrayRef, Canonical, IntoArray};
1616

vortex-array/src/serde.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ impl WriteFlatBuffer for ArrayNodeFlatBuffer<'_> {
208208
let children = Some(fbb.create_vector(children));
209209

210210
let buffers = Some(fbb.create_vector_from_iter((0..nbuffers).map(|i| i + self.buffer_idx)));
211-
let stats = Some(self.array.statistics().to_owned().write_flatbuffer(fbb));
211+
let stats = Some(self.array.statistics().write_flatbuffer(fbb));
212212

213213
fba::ArrayNode::create(
214214
fbb,

vortex-array/src/stats/array.rs

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use parking_lot::RwLock;
99
use vortex_error::{VortexError, VortexResult, vortex_panic};
1010
use vortex_scalar::{Scalar, ScalarValue};
1111

12-
use super::{Precision, Stat, StatType, StatsProvider, StatsSet, StatsSetIntoIter};
12+
use super::{Precision, Stat, StatsProvider, StatsSet, StatsSetIntoIter, TypedStatsSetRef};
1313
use crate::Array;
1414
use crate::compute::{
1515
MinMaxResult, is_constant, is_sorted, is_strict_sorted, min_max, nan_count, sum,
@@ -75,19 +75,32 @@ impl StatsSetRef<'_> {
7575
}
7676

7777
pub fn inherit_from(&self, stats: StatsSetRef<'_>) {
78-
stats.with_iter(|iter| self.inherit(iter));
78+
// Only inherit if the underlying stats are different
79+
if !Arc::ptr_eq(&self.array_stats.inner, &stats.array_stats.inner) {
80+
stats.with_iter(|iter| self.inherit(iter));
81+
}
7982
}
8083

8184
pub fn inherit<'a>(&self, iter: impl Iterator<Item = &'a (Stat, Precision<ScalarValue>)>) {
82-
// TODO(ngates): depending on statistic, this should choose the more precise one
8385
let mut guard = self.array_stats.inner.write();
8486
for (stat, value) in iter {
85-
guard.set(*stat, value.clone());
87+
if !value.is_exact() {
88+
if !guard.get(*stat).is_some_and(|v| v.is_exact()) {
89+
guard.set(*stat, value.clone());
90+
}
91+
} else {
92+
guard.set(*stat, value.clone());
93+
}
8694
}
8795
}
8896

89-
pub fn replace(&self, stats: StatsSet) {
90-
*self.array_stats.inner.write() = stats;
97+
pub fn with_typed_stats_set<U, F: FnOnce(TypedStatsSetRef) -> U>(&self, apply: F) -> U {
98+
apply(
99+
self.array_stats
100+
.inner
101+
.read()
102+
.as_typed_ref(self.dyn_array_ref.dtype()),
103+
)
91104
}
92105

93106
pub fn to_owned(&self) -> StatsSet {
@@ -165,32 +178,6 @@ impl StatsSetRef<'_> {
165178
}
166179

167180
impl StatsSetRef<'_> {
168-
pub fn get_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
169-
&self,
170-
stat: Stat,
171-
) -> Option<Precision<U>> {
172-
self.get(stat).map(|v| {
173-
v.map(|v| {
174-
U::try_from(&v).unwrap_or_else(|err| {
175-
vortex_panic!(
176-
err,
177-
"Failed to get stat {} as {}",
178-
stat,
179-
std::any::type_name::<U>()
180-
)
181-
})
182-
})
183-
})
184-
}
185-
186-
pub fn get_as_bound<S, U>(&self) -> Option<S::Bound>
187-
where
188-
S: StatType<U>,
189-
U: for<'a> TryFrom<&'a Scalar, Error = VortexError>,
190-
{
191-
self.get_as::<U>(S::STAT).map(|v| v.bound::<S>())
192-
}
193-
194181
pub fn compute_as<U: for<'a> TryFrom<&'a Scalar, Error = VortexError>>(
195182
&self,
196183
stat: Stat,

vortex-array/src/stats/flatbuffers.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,19 @@ use vortex_error::{VortexError, vortex_bail};
77
use vortex_flatbuffers::{ReadFlatBuffer, WriteFlatBuffer, array as fba};
88
use vortex_scalar::ScalarValue;
99

10-
use crate::stats::{Precision, Stat, StatsSet};
10+
use crate::stats::{Precision, Stat, StatsSet, StatsSetRef};
11+
12+
impl WriteFlatBuffer for StatsSetRef<'_> {
13+
type Target<'t> = fba::ArrayStats<'t>;
14+
15+
/// All statistics written must be exact
16+
fn write_flatbuffer<'fb>(
17+
&self,
18+
fbb: &mut FlatBufferBuilder<'fb>,
19+
) -> WIPOffset<Self::Target<'fb>> {
20+
self.with_typed_stats_set(|stats_set| stats_set.values.write_flatbuffer(fbb))
21+
}
22+
}
1123

1224
impl WriteFlatBuffer for StatsSet {
1325
type Target<'t> = fba::ArrayStats<'t>;

vortex-file/src/pruning.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@ use std::sync::Arc;
55

66
use vortex_array::ArrayRef;
77
use vortex_array::arrays::{ConstantArray, StructArray};
8-
use vortex_array::stats::{Stat, StatsSet};
8+
use vortex_array::stats::{Stat, StatsProvider, StatsSet};
99
use vortex_array::validity::Validity;
1010
use vortex_dtype::{Field, FieldName, FieldNames, FieldPath, StructFields};
1111
use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
1212
use vortex_expr::pruning::field_path_stat_field_name;
13-
use vortex_scalar::Scalar;
1413
use vortex_utils::aliases::hash_map::HashMap;
1514
use vortex_utils::aliases::hash_set::HashSet;
1615

@@ -43,19 +42,17 @@ pub fn extract_relevant_file_stats_as_struct_row(
4342
let Some(stat_set) = stats_sets.get(field_idx) else {
4443
vortex_bail!("missing stat field {} from stats set", field)
4544
};
45+
let typed_stats = stat_set.as_typed_ref(&field_dtype);
4646

4747
for stat in stats {
48-
let Some(stat_value) = stat_set.get(*stat).and_then(|p| p.as_exact()) else {
49-
vortex_bail!("missing stat {}, {} from stats set", field, stat)
50-
};
5148
if stat == &Stat::Max || stat == &Stat::Min || stat == &Stat::NaNCount {
52-
if let Some(stat_dtype) = stat.dtype(&field_dtype) {
53-
columns.push((
54-
field_path_stat_field_name(field_path, *stat),
55-
ConstantArray::new(Scalar::new(stat_dtype, stat_value.clone()), 1)
56-
.to_array(),
57-
));
58-
}
49+
let Some(stat_value) = typed_stats.get(*stat).and_then(|p| p.as_exact()) else {
50+
vortex_bail!("missing stat {}, {} from stats set", field, stat)
51+
};
52+
columns.push((
53+
field_path_stat_field_name(field_path, *stat),
54+
ConstantArray::new(stat_value, 1).to_array(),
55+
));
5956
} else {
6057
todo!("unsupported file prune stat {stat}")
6158
}

vortex-layout/src/layouts/flat/writer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ mod tests {
140140
use futures::stream;
141141
use vortex_array::arrays::{BoolArray, PrimitiveArray, StructArray};
142142
use vortex_array::builders::{ArrayBuilder, VarBinViewBuilder};
143-
use vortex_array::stats::{Precision, Stat};
143+
use vortex_array::stats::{Precision, Stat, StatsProviderExt};
144144
use vortex_array::validity::Validity;
145145
use vortex_array::{Array, ArrayContext, ArrayRef, IntoArray, ToCanonical};
146146
use vortex_buffer::buffer;

0 commit comments

Comments
 (0)