Skip to content

Commit da5462b

Browse files
robert3005AdamGS
authored andcommitted
NaN cannot be a min/max value of a primitive array (#3104)
In a followup we will add a stat for nan_count of the array fixes #1375
1 parent 60bdb22 commit da5462b

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

encodings/alp/src/alp/compute/compare.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ where
8080
Operator::Gt | Operator::Gte => {
8181
// Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan.
8282
// All values in the encoded array are definitely finite
83-
let is_not_finite = value.is_infinite() || NativePType::is_nan(value);
83+
let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value);
8484
if is_not_finite {
8585
Ok(Some(
8686
ConstantArray::new(value.is_sign_negative(), alp.len()).into_array(),
@@ -99,7 +99,7 @@ where
9999
Operator::Lt | Operator::Lte => {
100100
// Per IEEE 754 totalOrder semantics the ordering is -Nan < -Inf < Inf < Nan.
101101
// All values in the encoded array are definitely finite
102-
let is_not_finite = value.is_infinite() || NativePType::is_nan(value);
102+
let is_not_finite = NativePType::is_infinite(value) || NativePType::is_nan(value);
103103
if is_not_finite {
104104
Ok(Some(
105105
ConstantArray::new(value.is_sign_positive(), alp.len()).into_array(),

vortex-array/src/arrays/primitive/compute/min_max.rs

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,14 @@ where
3838

3939
fn compute_min_max<'a, T>(iter: impl Iterator<Item = &'a T>, dtype: &DType) -> Option<MinMaxResult>
4040
where
41-
T: Into<ScalarValue> + NativePType + Copy,
41+
T: Into<ScalarValue> + NativePType,
4242
{
43-
// this `compare` function provides a total ordering (even for NaN values)
44-
match iter.minmax_by(|a, b| a.total_compare(**b)) {
43+
// `total_compare` function provides a total ordering (even for NaN values).
44+
// However, we exclude NaNs from min max as they're not useful for any purpose where min/max would be used
45+
match iter
46+
.filter(|v| !v.is_nan())
47+
.minmax_by(|a, b| a.total_compare(**b))
48+
{
4549
itertools::MinMaxResult::NoElements => None,
4650
itertools::MinMaxResult::OneElement(&x) => {
4751
let scalar = Scalar::new(dtype.clone(), x.into());
@@ -56,3 +60,34 @@ where
5660
}),
5761
}
5862
}
63+
64+
#[cfg(test)]
65+
mod tests {
66+
use vortex_buffer::buffer;
67+
68+
use crate::arrays::PrimitiveArray;
69+
use crate::compute::min_max;
70+
use crate::validity::Validity;
71+
72+
#[test]
73+
fn min_max_nan() {
74+
let array = PrimitiveArray::new(
75+
buffer![f32::NAN, -f32::NAN, -1.0, 1.0],
76+
Validity::NonNullable,
77+
);
78+
let min_max = min_max(&array).unwrap().unwrap();
79+
assert_eq!(f32::try_from(min_max.min).unwrap(), -1.0);
80+
assert_eq!(f32::try_from(min_max.max).unwrap(), 1.0);
81+
}
82+
83+
#[test]
84+
fn min_max_inf() {
85+
let array = PrimitiveArray::new(
86+
buffer![f32::INFINITY, f32::NEG_INFINITY, -1.0, 1.0],
87+
Validity::NonNullable,
88+
);
89+
let min_max = min_max(&array).unwrap().unwrap();
90+
assert_eq!(f32::try_from(min_max.min).unwrap(), f32::NEG_INFINITY);
91+
assert_eq!(f32::try_from(min_max.max).unwrap(), f32::INFINITY);
92+
}
93+
}

vortex-dtype/src/ptype.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ pub trait NativePType:
6868
/// For integer types, this is always `false`
6969
fn is_nan(self) -> bool;
7070

71+
/// Whether this instance (`self`) is Infinite
72+
/// For integer types, this is always `false`
73+
fn is_infinite(self) -> bool;
74+
7175
/// Compare another instance of this type to `self`, providing a total ordering
7276
fn total_compare(self, other: Self) -> Ordering;
7377

@@ -109,6 +113,11 @@ macro_rules! native_ptype {
109113
false
110114
}
111115

116+
#[inline]
117+
fn is_infinite(self) -> bool {
118+
false
119+
}
120+
112121
#[inline]
113122
fn total_compare(self, other: Self) -> Ordering {
114123
self.cmp(&other)
@@ -132,6 +141,11 @@ macro_rules! native_float_ptype {
132141
<$T>::is_nan(self)
133142
}
134143

144+
#[inline]
145+
fn is_infinite(self) -> bool {
146+
<$T>::is_infinite(self)
147+
}
148+
135149
#[inline]
136150
fn total_compare(self, other: Self) -> Ordering {
137151
self.total_cmp(&other)

0 commit comments

Comments
 (0)