Skip to content

Commit 3bb2a7b

Browse files
authored
Use array len as denominator for selectivity (#1468)
True range is too expensive of a metric to calculate given current apis
1 parent e8f65e0 commit 3bb2a7b

File tree

1 file changed

+2
-29
lines changed

1 file changed

+2
-29
lines changed

vortex-array/src/compute/filter.rs

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ pub fn filter(array: &ArrayData, mask: FilterMask) -> VortexResult<ArrayData> {
9191
pub struct FilterMask {
9292
array: ArrayData,
9393
true_count: usize,
94-
true_range: (usize, usize),
9594
range_selectivity: f64,
9695
indices: OnceLock<Vec<usize>>,
9796
slices: OnceLock<Vec<(usize, usize)>>,
@@ -118,7 +117,6 @@ impl Clone for FilterMask {
118117
Self {
119118
array: self.array.clone(),
120119
true_count: self.true_count,
121-
true_range: self.true_range,
122120
range_selectivity: self.range_selectivity,
123121
indices: self.indices.clone(),
124122
slices: self.slices.clone(),
@@ -204,10 +202,6 @@ impl FilterMask {
204202
self.array.len() - self.true_count
205203
}
206204

207-
pub fn true_range(&self) -> (usize, usize) {
208-
self.true_range
209-
}
210-
211205
/// Return the selectivity of the full mask.
212206
pub fn selectivity(&self) -> f64 {
213207
self.true_count as f64 / self.len() as f64
@@ -221,11 +215,10 @@ impl FilterMask {
221215
/// Get the canonical representation of the mask.
222216
pub fn to_boolean_buffer(&self) -> VortexResult<BooleanBuffer> {
223217
log::debug!(
224-
"FilterMask: len {} selectivity: {} true_count: {} true_range: {:?}",
218+
"FilterMask: len {} selectivity: {} true_count: {}",
225219
self.len(),
226220
self.range_selectivity(),
227221
self.true_count,
228-
self.true_range(),
229222
);
230223
self.boolean_buffer().cloned()
231224
}
@@ -294,31 +287,11 @@ impl TryFrom<ArrayData> for FilterMask {
294287
.compute_true_count()
295288
.ok_or_else(|| vortex_err!("Failed to compute true count for boolean array"))?;
296289

297-
// We try to compute a tighter range over the true values of the mask. This provides a
298-
// better measure of selectivity when deciding between iter_indices and iter_slices.
299-
let true_range = if let Ok(bool) = BoolArray::try_from(array.clone()) {
300-
let start = bool
301-
.boolean_buffer()
302-
.set_indices()
303-
.next()
304-
.unwrap_or_default();
305-
// TODO(ngates): we can find this faster by creating a reverse iterator.
306-
let end = bool
307-
.boolean_buffer()
308-
.set_indices()
309-
.last()
310-
.unwrap_or_else(|| array.len());
311-
(start, end)
312-
} else {
313-
(0, array.len())
314-
};
315-
316-
let selectivity = true_count as f64 / (true_range.1 - true_range.0) as f64;
290+
let selectivity = true_count as f64 / array.len() as f64;
317291

318292
Ok(Self {
319293
array,
320294
true_count,
321-
true_range,
322295
range_selectivity: selectivity,
323296
indices: OnceLock::new(),
324297
slices: OnceLock::new(),

0 commit comments

Comments
 (0)