Skip to content

Commit f828fa0

Browse files
committed
Clarify docs in session accessors
Signed-off-by: Nicholas Gates <[email protected]>
1 parent ef74f3b commit f828fa0

File tree

29 files changed

+225
-227
lines changed

29 files changed

+225
-227
lines changed

vortex-array/src/arrays/filter/kernel.rs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ impl Kernel for FilterKernel {
3030
Ok(Filter::filter(&self.child.execute()?, &self.mask))
3131
}
3232

33-
fn cost_estimate(&self, selection: &Mask) -> f64 {
34-
cost_for_dtype(&self.dtype, selection)
35-
}
36-
3733
fn push_down_filter(self: Box<Self>, selection: &Mask) -> VortexResult<PushDownResult> {
3834
let new_mask = self.mask.intersect_by_rank(selection);
3935
Ok(match self.child.push_down_filter(&new_mask)? {
@@ -46,23 +42,3 @@ impl Kernel for FilterKernel {
4642
})
4743
}
4844
}
49-
50-
fn cost_for_dtype(dtype: &DType, selection: &Mask) -> f64 {
51-
if selection.true_count() == 0 {
52-
return 0.0;
53-
}
54-
55-
match dtype {
56-
DType::Null => 0.0,
57-
DType::Primitive(ptype, _) => {
58-
// Some estimate based on PType width and true-count or number of slices??
59-
// Maybe nullable types are more expensive?
60-
// ... No idea how to tune this yet.
61-
let width = ptype.byte_width() as f64;
62-
let selectivity = selection.true_count() as f64 / selection.len() as f64;
63-
width * selectivity
64-
}
65-
DType::Extension(ext) => cost_for_dtype(ext.storage_dtype(), selection),
66-
_ => f64::INFINITY,
67-
}
68-
}

vortex-array/src/arrays/filter/vtable.rs

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,24 @@ use std::ops::Range;
77
use vortex_buffer::BufferHandle;
88
use vortex_compute::filter::Filter;
99
use vortex_dtype::DType;
10-
use vortex_error::vortex_bail;
1110
use vortex_error::VortexExpect;
1211
use vortex_error::VortexResult;
12+
use vortex_error::vortex_bail;
1313
use vortex_mask::Mask;
1414
use vortex_scalar::Scalar;
1515

16+
use crate::Array;
17+
use crate::ArrayBufferVisitor;
18+
use crate::ArrayChildVisitor;
19+
use crate::ArrayEq;
20+
use crate::ArrayHash;
21+
use crate::ArrayRef;
22+
use crate::Canonical;
23+
use crate::IntoArray;
24+
use crate::Precision;
25+
use crate::arrays::LEGACY_SESSION;
1626
use crate::arrays::filter::array::FilterArray;
1727
use crate::arrays::filter::kernel::FilterKernel;
18-
use crate::arrays::LEGACY_SESSION;
1928
use crate::kernel::BindCtx;
2029
use crate::kernel::KernelRef;
2130
use crate::kernel::PushDownResult;
@@ -33,15 +42,6 @@ use crate::vtable::OperationsVTable;
3342
use crate::vtable::VTable;
3443
use crate::vtable::ValidityVTable;
3544
use crate::vtable::VisitorVTable;
36-
use crate::Array;
37-
use crate::ArrayBufferVisitor;
38-
use crate::ArrayChildVisitor;
39-
use crate::ArrayEq;
40-
use crate::ArrayHash;
41-
use crate::ArrayRef;
42-
use crate::Canonical;
43-
use crate::IntoArray;
44-
use crate::Precision;
4545

4646
vtable!(Filter);
4747

@@ -100,17 +100,9 @@ impl VTable for FilterVTable {
100100
let mut child = array.child.bind_kernel(ctx)?;
101101
let mask = array.mask.clone();
102102

103-
let full_cost = child.cost_estimate(&Mask::new_true(array.child.len()));
104-
let pushdown_cost = child.cost_estimate(&mask);
105-
log::debug!(
106-
"Filter kernel cost estimate: full={}, pushdown={}",
107-
full_cost,
108-
pushdown_cost
109-
);
110-
111103
// NOTE(ngates): for now we keep the same behavior as develop where we push-down any
112104
// query with <20% true values.
113-
let pushdown = (pushdown_cost < full_cost) || (array.mask.density() < 0.2);
105+
let pushdown = array.mask.density() < 0.2;
114106

115107
if pushdown {
116108
// Try to push down the filter to the child if it's cheaper.

vortex-array/src/arrays/scalar_fn/kernel.rs

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,6 @@ impl Kernel for ScalarFnKernel {
6161
Ok(self.scalar_fn.execute(args)?.ensure_vector(self.row_count))
6262
}
6363

64-
fn cost_estimate(&self, selection: &Mask) -> f64 {
65-
let self_cost = self.scalar_fn.cost_estimate(selection);
66-
let child_cost = self
67-
.inputs
68-
.iter()
69-
.map(|input| match input {
70-
KernelInput::Scalar(_) => 0.0,
71-
KernelInput::Vector(k) => k.cost_estimate(selection),
72-
})
73-
.sum::<f64>();
74-
75-
self_cost + child_cost
76-
}
77-
7864
fn push_down_filter(self: Box<Self>, selection: &Mask) -> VortexResult<PushDownResult> {
7965
let mut new_inputs = Vec::with_capacity(self.inputs.len());
8066
for (input, dtype) in self.inputs.into_iter().zip(&self.input_dtypes) {

vortex-array/src/arrays/scalar_fn/rules.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33

44
use vortex_error::VortexResult;
55
use vortex_scalar::Scalar;
6-
use vortex_vector::scalar_matches_dtype;
76
use vortex_vector::Datum;
87
use vortex_vector::VectorOps;
8+
use vortex_vector::scalar_matches_dtype;
99

10+
use crate::Array;
11+
use crate::ArrayRef;
12+
use crate::IntoArray;
1013
use crate::arrays::AnyScalarFn;
1114
use crate::arrays::ConstantArray;
1215
use crate::arrays::ConstantVTable;
1316
use crate::arrays::ScalarFnArray;
1417
use crate::expr::ExecutionArgs;
1518
use crate::optimizer::rules::ArrayReduceRule;
16-
use crate::Array;
17-
use crate::ArrayRef;
18-
use crate::IntoArray;
1919

2020
#[derive(Debug)]
2121
pub(crate) struct ScalarFnConstantRule;

vortex-array/src/arrays/scalar_fn/vtable/canonical.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33

44
use vortex_error::VortexExpect;
55

6+
use crate::Array;
7+
use crate::Canonical;
8+
use crate::arrays::LEGACY_SESSION;
69
use crate::arrays::scalar_fn::array::ScalarFnArray;
710
use crate::arrays::scalar_fn::vtable::ScalarFnVTable;
8-
use crate::arrays::LEGACY_SESSION;
911
use crate::executor::VectorExecutor;
1012
use crate::expr::ExecutionArgs;
1113
use crate::vectors::VectorIntoArray;
1214
use crate::vtable::CanonicalVTable;
13-
use crate::Array;
14-
use crate::Canonical;
1515

1616
impl CanonicalVTable<ScalarFnVTable> for ScalarFnVTable {
1717
fn canonicalize(array: &ScalarFnArray) -> Canonical {

vortex-array/src/executor.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4-
use vortex_error::vortex_ensure;
54
use vortex_error::VortexResult;
5+
use vortex_error::vortex_ensure;
66
use vortex_session::VortexSession;
77
use vortex_vector::Datum;
88
use vortex_vector::Vector;
99
use vortex_vector::VectorOps;
1010

11+
use crate::Array;
12+
use crate::ArrayRef;
1113
use crate::arrays::ConstantVTable;
1214
use crate::kernel::BindCtx;
1315
use crate::session::ArraySessionExt;
14-
use crate::Array;
15-
use crate::ArrayRef;
1616

1717
/// Executor for exporting a Vortex [`Vector`] or [`Datum`] from an [`ArrayRef`].
1818
pub trait VectorExecutor {

vortex-array/src/expr/exprs/get_item.rs

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,17 @@ use vortex_dtype::DType;
99
use vortex_dtype::FieldName;
1010
use vortex_dtype::FieldPath;
1111
use vortex_dtype::Nullability;
12-
use vortex_error::vortex_err;
1312
use vortex_error::VortexExpect;
1413
use vortex_error::VortexResult;
15-
use vortex_mask::Mask;
14+
use vortex_error::vortex_err;
1615
use vortex_proto::expr as pb;
1716
use vortex_vector::Datum;
1817
use vortex_vector::ScalarOps;
1918
use vortex_vector::VectorOps;
2019

20+
use crate::ArrayRef;
21+
use crate::ToCanonical;
2122
use crate::compute::mask;
22-
use crate::expr::exprs::root::root;
23-
use crate::expr::stats::Stat;
2423
use crate::expr::Arity;
2524
use crate::expr::ChildName;
2625
use crate::expr::ExecutionArgs;
@@ -31,9 +30,10 @@ use crate::expr::SimplifyCtx;
3130
use crate::expr::StatsCatalog;
3231
use crate::expr::VTable;
3332
use crate::expr::VTableExt;
33+
use crate::expr::exprs::root::root;
34+
use crate::expr::lit;
35+
use crate::expr::stats::Stat;
3436
use crate::scalar_fns::ExprBuiltins;
35-
use crate::ArrayRef;
36-
use crate::ToCanonical;
3737

3838
pub struct GetItem;
3939

@@ -173,6 +173,44 @@ impl VTable for GetItem {
173173
Ok(None)
174174
}
175175

176+
fn simplify_untyped(
177+
&self,
178+
field_name: &FieldName,
179+
expr: &Expression,
180+
) -> VortexResult<Option<Expression>> {
181+
let child = expr.child(0);
182+
183+
// If the child is a Pack expression, we can directly return the corresponding child.
184+
if let Some(pack) = child.as_opt::<Pack>() {
185+
let idx = pack
186+
.names
187+
.iter()
188+
.position(|name| name == field_name)
189+
.ok_or_else(|| {
190+
vortex_err!(
191+
"Cannot find field {} in pack fields {:?}",
192+
field_name,
193+
pack.names
194+
)
195+
})?;
196+
197+
let mut field = child.child(idx).clone();
198+
199+
// It's useful to simplify this node without type info, but we need to make sure
200+
// the nullability is correct. We cannot cast since we don't have the dtype info here,
201+
// so instead we insert a Mask expression that we know converts a child's dtype to
202+
// nullable.
203+
if pack.nullability.is_nullable() {
204+
// Mask with an all-true array to ensure the field DType is nullable.
205+
field = field.mask(lit(true))?;
206+
}
207+
208+
return Ok(Some(field));
209+
}
210+
211+
Ok(None)
212+
}
213+
176214
fn stat_expression(
177215
&self,
178216
field_name: &FieldName,
@@ -200,11 +238,6 @@ impl VTable for GetItem {
200238
// If this type-checks its infallible.
201239
false
202240
}
203-
204-
fn cost_estimate(&self, _options: &Self::Options, _selection: &Mask) -> f64 {
205-
// This is largely a metadata-only operation.
206-
0.0
207-
}
208241
}
209242

210243
/// Creates an expression that accesses a field from the root array.
@@ -242,15 +275,15 @@ mod tests {
242275
use vortex_dtype::StructFields;
243276
use vortex_scalar::Scalar;
244277

278+
use crate::Array;
279+
use crate::IntoArray;
245280
use crate::arrays::StructArray;
246281
use crate::expr::exprs::binary::checked_add;
247282
use crate::expr::exprs::get_item::get_item;
248283
use crate::expr::exprs::literal::lit;
249284
use crate::expr::exprs::pack::pack;
250285
use crate::expr::exprs::root::root;
251286
use crate::validity::Validity;
252-
use crate::Array;
253-
use crate::IntoArray;
254287

255288
fn test_array() -> StructArray {
256289
StructArray::from_fields(&[

vortex-array/src/expr/exprs/mask.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use std::fmt::Formatter;
5+
use std::ops::Not;
56

67
use vortex_dtype::DType;
78
use vortex_dtype::Nullability;
@@ -15,6 +16,7 @@ use vortex_vector::ScalarOps;
1516
use vortex_vector::VectorMutOps;
1617
use vortex_vector::VectorOps;
1718

19+
use crate::Array;
1820
use crate::ArrayRef;
1921
use crate::expr::Arity;
2022
use crate::expr::ChildName;
@@ -75,10 +77,15 @@ impl VTable for Mask {
7577
fn evaluate(
7678
&self,
7779
_options: &Self::Options,
78-
_expr: &Expression,
79-
_scope: &ArrayRef,
80+
expr: &Expression,
81+
scope: &ArrayRef,
8082
) -> VortexResult<ArrayRef> {
81-
todo!()
83+
let child = expr.child(0).evaluate(scope)?;
84+
85+
// Invert the validity mask - we want to set values to null where validity is false.
86+
let inverted_mask = child.validity_mask().not();
87+
88+
crate::compute::mask(&child, &inverted_mask)
8289
}
8390

8491
fn execute(&self, _options: &Self::Options, args: ExecutionArgs) -> VortexResult<Datum> {

vortex-array/src/expr/exprs/merge.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use vortex_dtype::StructFields;
1414
use vortex_error::VortexExpect;
1515
use vortex_error::VortexResult;
1616
use vortex_error::vortex_bail;
17-
use vortex_mask::Mask;
1817
use vortex_utils::aliases::hash_set::HashSet;
1918
use vortex_vector::Datum;
2019

@@ -186,11 +185,6 @@ impl VTable for Merge {
186185
todo!()
187186
}
188187

189-
fn cost_estimate(&self, _options: &Self::Options, _selection: &Mask) -> f64 {
190-
// This is largely a metadata-only operation.
191-
0.0
192-
}
193-
194188
fn simplify(
195189
&self,
196190
options: &Self::Options,

0 commit comments

Comments
 (0)