Skip to content

Commit d30d191

Browse files
authored
fix: Bool arrays with one value and rest being nulls are not constant (#1360)
I have added more test coverage now as I have previously fixed this bug
1 parent 6f542ad commit d30d191

File tree

5 files changed

+69
-11
lines changed

5 files changed

+69
-11
lines changed

encodings/roaring/src/boolean/mod.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,11 @@ impl RoaringBoolArray {
4343
)
4444
}
4545

46-
let stats =
47-
StatsSet::bools_with_true_count(bitmap.statistics().cardinality as usize, length);
46+
let stats = StatsSet::bools_with_true_and_null_count(
47+
bitmap.statistics().cardinality as usize,
48+
0,
49+
length,
50+
);
4851

4952
Ok(Self {
5053
typed: TypedArray::try_from_parts(

encodings/roaring/src/boolean/stats.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ impl ArrayStatisticsCompute for RoaringBoolArray {
1212
stat,
1313
Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant
1414
) {
15-
return Ok(StatsSet::bools_with_true_count(
15+
return Ok(StatsSet::bools_with_true_and_null_count(
1616
true_count as usize,
17+
0,
1718
self.len(),
1819
));
1920
}

fuzz/fuzz_targets/array_ops.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,15 @@ fn assert_array_eq(lhs: &ArrayData, rhs: &ArrayData, step: usize) {
9898
let l = scalar_at(lhs, idx).unwrap();
9999
let r = scalar_at(rhs, idx).unwrap();
100100

101-
assert_eq!(l.is_valid(), r.is_valid());
101+
assert_eq!(
102+
l.is_valid(),
103+
r.is_valid(),
104+
"LHS validity {} != RHS validity {} at index {idx}, lhs is {} rhs is {} in step {step}",
105+
l.is_valid(),
106+
r.is_valid(),
107+
lhs.encoding().id(),
108+
rhs.encoding().id()
109+
);
102110
assert!(
103111
equal_scalar_values(l.value(), r.value()),
104112
"{l} != {r} at index {idx}, lhs is {} rhs is {} in step {step}",

vortex-array/src/array/bool/stats.rs

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ impl ArrayStatisticsCompute for BoolArray {
2020
return Ok(StatsSet::from_iter([
2121
(Stat::TrueCount, 0.into()),
2222
(Stat::NullCount, 0.into()),
23-
(Stat::IsSorted, true.into()),
24-
(Stat::IsStrictSorted, true.into()),
2523
(Stat::RunCount, 0.into()),
2624
]));
2725
}
@@ -46,8 +44,9 @@ impl ArrayStatisticsCompute for NullableBools<'_> {
4644
stat,
4745
Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant
4846
) {
49-
return Ok(StatsSet::bools_with_true_count(
47+
return Ok(StatsSet::bools_with_true_and_null_count(
5048
self.0.bitand(self.1).count_set_bits(),
49+
self.1.count_set_bits(),
5150
self.0.len(),
5251
));
5352
}
@@ -86,8 +85,9 @@ impl ArrayStatisticsCompute for BooleanBuffer {
8685
stat,
8786
Stat::TrueCount | Stat::Min | Stat::Max | Stat::IsConstant
8887
) {
89-
return Ok(StatsSet::bools_with_true_count(
88+
return Ok(StatsSet::bools_with_true_and_null_count(
9089
self.count_set_bits(),
90+
0,
9191
self.len(),
9292
));
9393
}
@@ -165,8 +165,9 @@ impl BoolStatsAccumulator {
165165

166166
#[cfg(test)]
167167
mod test {
168-
use vortex_dtype::DType;
168+
use arrow_buffer::BooleanBuffer;
169169
use vortex_dtype::Nullability::Nullable;
170+
use vortex_dtype::{DType, Nullability};
170171
use vortex_scalar::Scalar;
171172

172173
use crate::array::BoolArray;
@@ -226,6 +227,46 @@ mod test {
226227
assert!(bool_arr.statistics().compute_max::<bool>().unwrap());
227228
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 3);
228229
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 2);
230+
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 3);
231+
}
232+
233+
#[test]
234+
fn one_non_null_value() {
235+
let bool_arr = BoolArray::from_iter(vec![Some(false), None]);
236+
assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
237+
assert!(bool_arr.statistics().compute_is_sorted().unwrap());
238+
assert!(!bool_arr.statistics().compute_is_constant().unwrap());
239+
assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
240+
assert!(!bool_arr.statistics().compute_max::<bool>().unwrap());
241+
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1);
242+
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
243+
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 1);
244+
}
245+
246+
#[test]
247+
fn empty_array() {
248+
let bool_arr = BoolArray::new(BooleanBuffer::new_set(0), Nullability::NonNullable);
249+
assert!(bool_arr.statistics().compute_is_strict_sorted().is_none());
250+
assert!(bool_arr.statistics().compute_is_sorted().is_none());
251+
assert!(bool_arr.statistics().compute_is_constant().is_none());
252+
assert!(bool_arr.statistics().compute_min::<bool>().is_none());
253+
assert!(bool_arr.statistics().compute_max::<bool>().is_none());
254+
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 0);
255+
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
256+
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 0);
257+
}
258+
259+
#[test]
260+
fn all_false() {
261+
let bool_arr = BoolArray::from_iter(vec![false, false, false]);
262+
assert!(!bool_arr.statistics().compute_is_strict_sorted().unwrap());
263+
assert!(bool_arr.statistics().compute_is_sorted().unwrap());
264+
assert!(bool_arr.statistics().compute_is_constant().unwrap());
265+
assert!(!bool_arr.statistics().compute_min::<bool>().unwrap());
266+
assert!(!bool_arr.statistics().compute_max::<bool>().unwrap());
267+
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1);
268+
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
269+
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 0);
229270
}
230271

231272
#[test]
@@ -244,5 +285,6 @@ mod test {
244285
);
245286
assert_eq!(bool_arr.statistics().compute_run_count().unwrap(), 1);
246287
assert_eq!(bool_arr.statistics().compute_true_count().unwrap(), 0);
288+
assert_eq!(bool_arr.statistics().compute_null_count().unwrap(), 5);
247289
}
248290
}

vortex-array/src/stats/statsset.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,18 @@ impl StatsSet {
8080
stats
8181
}
8282

83-
pub fn bools_with_true_count(true_count: usize, len: usize) -> StatsSet {
83+
pub fn bools_with_true_and_null_count(
84+
true_count: usize,
85+
null_count: usize,
86+
len: usize,
87+
) -> StatsSet {
8488
StatsSet::from_iter([
8589
(Stat::TrueCount, true_count.into()),
8690
(Stat::Min, (true_count == len).into()),
8791
(Stat::Max, (true_count > 0).into()),
8892
(
8993
Stat::IsConstant,
90-
(true_count == 0 || true_count == len).into(),
94+
((true_count == 0 && null_count == 0) || true_count == len).into(),
9195
),
9296
])
9397
}

0 commit comments

Comments
 (0)