Skip to content

Commit edc2bc2

Browse files
committed
always iterate the validity
1 parent 4ed455e commit edc2bc2

File tree

1 file changed

+46
-42
lines changed

1 file changed

+46
-42
lines changed

encodings/runend/src/array.rs

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -230,34 +230,24 @@ impl StatisticsVTable<RunEndArray> for RunEndEncoding {
230230
fn compute_statistics(&self, array: &RunEndArray, stat: Stat) -> VortexResult<StatsSet> {
231231
let mut stats = StatsSet::default();
232232

233-
match stat {
234-
Stat::Min | Stat::Max => {
235-
if let Some(extrema) = array.values().statistics().compute(stat) {
236-
stats.set(stat, extrema);
237-
}
238-
}
239-
Stat::IsSorted => {
240-
let is_sorted = Scalar::from(
241-
array
242-
.values()
243-
.statistics()
244-
.compute_is_sorted()
245-
.unwrap_or(false)
246-
&& array.logical_validity().all_valid(),
247-
);
248-
stats.set(stat, is_sorted);
249-
}
233+
let value = match stat {
234+
Stat::Min | Stat::Max => array.values().statistics().compute(stat),
235+
Stat::IsSorted => Some(Scalar::from(
236+
array
237+
.values()
238+
.statistics()
239+
.compute_is_sorted()
240+
.unwrap_or(false)
241+
&& array.logical_validity().all_valid(),
242+
)),
250243
Stat::TrueCount => match array.dtype() {
251244
DType::Bool(_) => {
252245
let ends = array.ends().into_primitive()?;
253246
let bools = array.values().into_bool()?.boolean_buffer();
254-
let mut true_count: u64 = 0;
255-
let mut null_count: u64 = 0;
256247

257-
match array.values().logical_validity() {
248+
let true_count = match array.values().logical_validity() {
258249
LogicalValidity::AllValid(_) => {
259-
null_count = 0;
260-
true_count = match_each_unsigned_integer_ptype!(ends.ptype(), |$P| {
250+
match_each_unsigned_integer_ptype!(ends.ptype(), |$P| {
261251
let mut begin = array.offset() as $P;
262252
ends
263253
.as_slice::<$P>()
@@ -269,36 +259,46 @@ impl StatisticsVTable<RunEndArray> for RunEndEncoding {
269259
(len as u64) * (bools.value(index as usize) as u64)
270260
})
271261
.sum()
272-
});
273-
}
274-
LogicalValidity::AllInvalid(_) => {
275-
null_count = array.len() as u64;
276-
true_count = 0;
262+
})
277263
}
264+
LogicalValidity::AllInvalid(_) => 0,
278265
LogicalValidity::Array(is_valid) => {
279266
let is_valid = is_valid.into_bool()?.boolean_buffer();
280-
281-
match_each_unsigned_integer_ptype!(ends.ptype(), |$P| {
282-
let mut begin = array.offset() as $P;
283-
for (index, end) in ends.as_slice::<$P>().iter().enumerate() {
284-
let len = *end - begin;
285-
begin = *end;
286-
true_count += (len as u64) * (bools.value(index as usize) as u64) * (is_valid.value(index as usize) as u64);
287-
null_count += (len as u64) * (is_valid.value(index as usize) as u64);
267+
let mut is_valid = is_valid.set_indices();
268+
match is_valid.next() {
269+
None => array.len() as u64,
270+
Some(valid_index) => {
271+
let offsetted_len = (array.len() + array.offset()) as u64;
272+
let mut true_count: u64 = array.len() as u64;
273+
match_each_unsigned_integer_ptype!(ends.ptype(), |$P| {
274+
let ends = ends.as_slice::<$P>();
275+
let begin = if valid_index == 0 {
276+
0
277+
} else {
278+
ends[valid_index - 1]
279+
};
280+
281+
true_count += bools.value(valid_index as usize) as u64 * (cmp::min(ends[valid_index] as u64, offsetted_len) - begin as u64);
282+
283+
for valid_index in is_valid {
284+
true_count += bools.value(valid_index as usize) as u64 * (cmp::min(ends[valid_index] as u64, offsetted_len) - ends[valid_index - 1] as u64);
285+
}
286+
287+
true_count
288+
})
288289
}
289-
});
290+
}
290291
}
291292
};
292293

293-
stats.set(Stat::TrueCount, true_count);
294-
stats.set(Stat::NullCount, null_count);
294+
Some(Scalar::from(true_count))
295295
}
296-
DType::Primitive(..) => {}
296+
DType::Primitive(..) => None,
297297
dtype => vortex_bail!("invalid dtype: {}", dtype),
298298
},
299299
Stat::NullCount => {
300300
let ends = array.ends().into_primitive()?;
301-
let null_count: u64 = match array.values().logical_validity() {
301+
let null_count = match array.values().logical_validity() {
302302
LogicalValidity::AllValid(_) => 0_u64,
303303
LogicalValidity::AllInvalid(_) => array.len() as u64,
304304
LogicalValidity::Array(is_valid) => {
@@ -328,9 +328,13 @@ impl StatisticsVTable<RunEndArray> for RunEndEncoding {
328328
}
329329
}
330330
};
331-
stats.set(stat, null_count);
331+
Some(Scalar::from(null_count))
332332
}
333-
_ => {}
333+
_ => None,
334+
};
335+
336+
if let Some(value) = value {
337+
stats.set(stat, value)
334338
};
335339

336340
Ok(stats)

0 commit comments

Comments
 (0)