Skip to content

Commit f97dfc5

Browse files
authored
Fix min-max stats computation (#2237)
issue introduced in #2213, PR got a bit out of control as I put some changes I thought made sense, mostly making the `Statistics` function names more explicit now that `Array` implements it directly.
1 parent 4781ca5 commit f97dfc5

File tree

29 files changed

+148
-168
lines changed

29 files changed

+148
-168
lines changed

encodings/bytebool/src/stats.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ impl StatisticsVTable<ByteBoolArray> for ByteBoolEncoding {
1515
let bools = array.as_ref().clone().into_bool()?;
1616
Ok(bools
1717
.statistics()
18-
.compute(stat)
18+
.compute_stat(stat)
1919
.map(|value| StatsSet::of(stat, Precision::exact(value)))
2020
.unwrap_or_default())
2121
}
@@ -88,8 +88,8 @@ mod tests {
8888
assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
8989
assert!(bool_arr.statistics().compute_is_sorted().unwrap());
9090
assert!(bool_arr.statistics().compute_is_constant().unwrap());
91-
assert!(bool_arr.statistics().compute(Stat::Min).is_none());
92-
assert!(bool_arr.statistics().compute(Stat::Max).is_none());
91+
assert!(bool_arr.statistics().compute_stat(Stat::Min).is_none());
92+
assert!(bool_arr.statistics().compute_stat(Stat::Max).is_none());
9393
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1);
9494
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
9595
}

encodings/dict/src/stats.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use vortex_array::stats::{Precision, Stat, StatsSet};
1+
use vortex_array::stats::{Precision, Stat, Statistics, StatsSet};
22
use vortex_array::vtable::StatisticsVTable;
33
use vortex_error::VortexResult;
44

@@ -10,27 +10,27 @@ impl StatisticsVTable<DictArray> for DictEncoding {
1010

1111
match stat {
1212
Stat::RunCount => {
13-
if let Some(rc) = array.codes().statistics().compute(Stat::RunCount) {
13+
if let Some(rc) = array.codes().compute_stat(Stat::RunCount) {
1414
stats.set(Stat::RunCount, Precision::exact(rc));
1515
}
1616
}
1717
Stat::Min => {
18-
if let Some(min) = array.values().statistics().compute(Stat::Min) {
18+
if let Some(min) = array.values().compute_stat(Stat::Min) {
1919
stats.set(Stat::Min, Precision::exact(min));
2020
}
2121
}
2222
Stat::Max => {
23-
if let Some(max) = array.values().statistics().compute(Stat::Max) {
23+
if let Some(max) = array.values().compute_stat(Stat::Max) {
2424
stats.set(Stat::Max, Precision::exact(max));
2525
}
2626
}
2727
Stat::IsConstant => {
28-
if let Some(is_constant) = array.codes().statistics().compute(Stat::IsConstant) {
28+
if let Some(is_constant) = array.codes().compute_stat(Stat::IsConstant) {
2929
stats.set(Stat::IsConstant, Precision::exact(is_constant));
3030
}
3131
}
3232
Stat::NullCount => {
33-
if let Some(null_count) = array.codes().statistics().compute(Stat::NullCount) {
33+
if let Some(null_count) = array.codes().compute_stat(Stat::NullCount) {
3434
stats.set(Stat::NullCount, Precision::exact(null_count));
3535
}
3636
}
@@ -42,14 +42,12 @@ impl StatisticsVTable<DictArray> for DictEncoding {
4242
.compute_is_sorted()
4343
.unwrap_or(false)
4444
{
45-
if let Some(codes_are_sorted) =
46-
array.codes().statistics().compute(Stat::IsSorted)
47-
{
45+
if let Some(codes_are_sorted) = array.codes().compute_stat(Stat::IsSorted) {
4846
stats.set(Stat::IsSorted, Precision::exact(codes_are_sorted));
4947
}
5048

5149
if let Some(codes_are_strict_sorted) =
52-
array.codes().statistics().compute(Stat::IsStrictSorted)
50+
array.codes().compute_stat(Stat::IsStrictSorted)
5351
{
5452
stats.set(
5553
Stat::IsStrictSorted,

encodings/fastlanes/src/for/compress.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use num_traits::{PrimInt, WrappingAdd, WrappingSub};
22
use vortex_array::array::PrimitiveArray;
3-
use vortex_array::stats::Stat;
3+
use vortex_array::stats::{Stat, Statistics as _};
44
use vortex_array::variants::PrimitiveArrayTrait;
55
use vortex_array::{IntoArray, IntoArrayVariant};
66
use vortex_buffer::{Buffer, BufferMut};
@@ -12,8 +12,7 @@ use crate::FoRArray;
1212

1313
pub fn for_compress(array: PrimitiveArray) -> VortexResult<FoRArray> {
1414
let min = array
15-
.statistics()
16-
.compute(Stat::Min)
15+
.compute_stat(Stat::Min)
1716
.ok_or_else(|| vortex_err!("Min stat not found"))?;
1817

1918
let dtype = array.dtype().clone();
@@ -91,7 +90,7 @@ mod test {
9190
#[test]
9291
fn test_zeros() {
9392
let array = PrimitiveArray::new(buffer![0i32; 100], Validity::NonNullable);
94-
assert!(array.statistics().to_set().into_iter().next().is_none());
93+
assert!(array.statistics().stats_set().into_iter().next().is_none());
9594

9695
let compressed = for_compress(array.clone()).unwrap();
9796
assert_eq!(compressed.dtype(), array.dtype());

encodings/runend/src/statistics.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::cmp;
22

33
use arrow_buffer::BooleanBuffer;
44
use itertools::Itertools;
5-
use vortex_array::stats::{Precision, Stat, StatsSet};
5+
use vortex_array::stats::{Precision, Stat, Statistics, StatsSet};
66
use vortex_array::variants::PrimitiveArrayTrait;
77
use vortex_array::vtable::StatisticsVTable;
88
use vortex_array::IntoArrayVariant as _;
@@ -16,7 +16,7 @@ use crate::{RunEndArray, RunEndEncoding};
1616
impl StatisticsVTable<RunEndArray> for RunEndEncoding {
1717
fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult<StatsSet> {
1818
let maybe_stat = match stat {
19-
Stat::Min | Stat::Max => array.values().statistics().compute(stat),
19+
Stat::Min | Stat::Max => array.values().compute_stat(stat),
2020
Stat::IsSorted => Some(ScalarValue::from(
2121
array
2222
.values()

encodings/sparse/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
33
use vortex_array::array::BooleanBufferBuilder;
44
use vortex_array::compute::{scalar_at, sub_scalar};
55
use vortex_array::patches::{Patches, PatchesMetadata};
6-
use vortex_array::stats::{Stat, StatsSet};
6+
use vortex_array::stats::{Stat, Statistics, StatsSet};
77
use vortex_array::variants::PrimitiveArrayTrait;
88
use vortex_array::visitor::ArrayVisitor;
99
use vortex_array::vtable::{StatisticsVTable, ValidateVTable, ValidityVTable, VisitorVTable};
@@ -170,7 +170,7 @@ impl VisitorVTable<SparseArray> for SparseEncoding {
170170
impl StatisticsVTable<SparseArray> for SparseEncoding {
171171
fn compute_statistics(&self, array: &SparseArray, stat: Stat) -> VortexResult<StatsSet> {
172172
let values = array.patches().into_values();
173-
let stats = values.statistics().compute_all(&[stat])?;
173+
let stats = values.compute_all(&[stat])?;
174174
if array.len() == values.len() {
175175
return Ok(stats);
176176
}

encodings/zigzag/src/array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use vortex_array::array::PrimitiveArray;
2-
use vortex_array::stats::{Precision, Stat, StatsSet};
2+
use vortex_array::stats::{Precision, Stat, Statistics, StatsSet};
33
use vortex_array::variants::PrimitiveArrayTrait;
44
use vortex_array::visitor::ArrayVisitor;
55
use vortex_array::vtable::{
@@ -98,7 +98,7 @@ impl StatisticsVTable<ZigZagArray> for ZigZagEncoding {
9898

9999
// these stats are the same for array and array.encoded()
100100
if matches!(stat, Stat::IsConstant | Stat::NullCount) {
101-
if let Some(val) = array.encoded().statistics().compute(stat) {
101+
if let Some(val) = array.encoded().compute_stat(stat) {
102102
stats.set(stat, Precision::exact(val));
103103
}
104104
} else if matches!(stat, Stat::Min | Stat::Max) {

vortex-array/src/array/bool/stats.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ mod test {
171171
use vortex_dtype::Nullability;
172172

173173
use crate::array::BoolArray;
174-
use crate::stats::Stat;
174+
use crate::stats::{Stat, Statistics};
175175

176176
#[test]
177177
fn bool_stats() {
@@ -275,8 +275,8 @@ mod test {
275275
assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
276276
assert!(bool_arr.statistics().compute_is_sorted().unwrap());
277277
assert!(bool_arr.statistics().compute_is_constant().unwrap());
278-
assert!(bool_arr.statistics().compute(Stat::Min).is_none());
279-
assert!(bool_arr.statistics().compute(Stat::Max).is_none());
278+
assert!(bool_arr.compute_stat(Stat::Min).is_none());
279+
assert!(bool_arr.compute_stat(Stat::Max).is_none());
280280
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1);
281281
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
282282
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 5);

vortex-array/src/array/chunked/stats.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ impl StatisticsVTable<ChunkedArray> for ChunkedEncoding {
1919
s.compute_all(&[stat, Stat::Min, Stat::Max]).ok()
2020
}
2121
_ => s
22-
.compute(stat)
22+
.compute_stat(stat)
2323
.map(|s| StatsSet::of(stat, Precision::exact(s))),
2424
}
2525
.unwrap_or_default()

vortex-array/src/array/constant/canonical.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,10 @@ mod tests {
161161
fn test_canonicalize_propagates_stats() {
162162
let scalar = Scalar::bool(true, Nullability::NonNullable);
163163
let const_array = ConstantArray::new(scalar.clone(), 4).into_array();
164-
let stats = const_array.statistics().to_set();
164+
let stats = const_array.statistics().stats_set();
165165

166166
let canonical = const_array.into_canonical().unwrap();
167-
let canonical_stats = canonical.as_ref().statistics().to_set();
167+
let canonical_stats = canonical.as_ref().statistics().stats_set();
168168

169169
let reference = StatsSet::constant(scalar, 4);
170170
for stat in all::<Stat>() {

vortex-array/src/array/extension/compute/mod.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
mod compare;
22
mod to_arrow;
33

4+
use std::sync::Arc;
5+
6+
use vortex_dtype::Nullability;
47
use vortex_error::VortexResult;
58
use vortex_scalar::Scalar;
69

@@ -77,10 +80,11 @@ impl TakeFn<ExtensionArray> for ExtensionEncoding {
7780

7881
impl MinMaxFn<ExtensionArray> for ExtensionEncoding {
7982
fn min_max(&self, array: &ExtensionArray) -> VortexResult<Option<MinMaxResult>> {
83+
let dtype = Arc::new(array.ext_dtype().with_nullability(Nullability::NonNullable));
8084
Ok(
8185
min_max(array.storage())?.map(|MinMaxResult { min, max }| MinMaxResult {
82-
min: Scalar::extension(array.ext_dtype().clone(), min),
83-
max: Scalar::extension(array.ext_dtype().clone(), max),
86+
min: Scalar::extension(dtype.clone(), min),
87+
max: Scalar::extension(dtype, max),
8488
}),
8589
)
8690
}

0 commit comments

Comments
 (0)