@@ -7,13 +7,14 @@ use std::fmt::{Debug, Display, Formatter};
77use std:: hash:: { Hash , Hasher } ;
88use std:: sync:: Arc ;
99
10- use vortex_dtype:: { DType , FieldPath } ;
10+ use vortex_dtype:: DType ;
1111use vortex_error:: { VortexExpect , VortexResult } ;
1212use vortex_vector:: Vector ;
1313
1414use crate :: ArrayRef ;
1515use crate :: expr:: display:: DisplayTreeExpr ;
1616use crate :: expr:: { ChildName , ExprId , ExprVTable , ExpressionView , StatsCatalog , VTable } ;
17+ use crate :: stats:: Stat ;
1718
1819/// A node in a Vortex expression tree.
1920///
@@ -164,39 +165,34 @@ impl Expression {
164165 ///
165166 /// Some expressions, in theory, have falsifications but this function does not support them
166167 /// such as `x < (y < z)` or `x LIKE "needle%"`.
167- pub fn stat_falsification ( & self , catalog : & mut dyn StatsCatalog ) -> Option < Expression > {
168+ pub fn stat_falsification ( & self , catalog : & dyn StatsCatalog ) -> Option < Expression > {
168169 self . vtable . as_dyn ( ) . stat_falsification ( self , catalog)
169170 }
170171
171- /// An expression for the upper non-null bound of this expression , if available.
172+ /// Returns an expression representing the zoned statistic for the given stat , if available.
172173 ///
173- /// This function returns None if there is no upper bound, or it is difficult to compute.
174+ /// The [`StatsCatalog`] returns expressions that can be evaluated using the zone map as a
175+ /// scope. Expressions can implement this function to propagate such statistics through the
176+ /// expression tree. For example, the `a + 10` expression could propagate `min: min(a) + 10`.
174177 ///
175- /// The returned expression evaluates to null if the maximum value is unknown. In that case, you
176- /// _must not_ assume the array is empty _nor_ may you assume the array only contains non-null
177- /// values.
178- pub fn stat_max ( & self , catalog : & mut dyn StatsCatalog ) -> Option < Expression > {
179- self . vtable . as_dyn ( ) . stat_max ( self , catalog)
178+ /// NOTE(gatesn): we currently cannot represent statistics over nested fields. Please file an
179+ /// issue to discuss a solution to this.
180+ pub fn stat_expression ( & self , stat : Stat , catalog : & dyn StatsCatalog ) -> Option < Expression > {
181+ self . vtable . as_dyn ( ) . stat_expression ( self , stat, catalog)
180182 }
181183
182- /// An expression for the lower non-null bound of this expression , if available.
184+ /// Returns an expression representing the zoned maximum statistic , if available.
183185 ///
184- /// See [`Expression::stat_max `] for important details.
185- pub fn stat_min ( & self , catalog : & mut dyn StatsCatalog ) -> Option < Expression > {
186- self . vtable . as_dyn ( ) . stat_min ( self , catalog)
186+ /// See [`Self::stat_expression `] for details.
187+ pub fn stat_min ( & self , catalog : & dyn StatsCatalog ) -> Option < Expression > {
188+ self . stat_expression ( Stat :: Min , catalog)
187189 }
188190
189- /// An expression for the NaN count for a column , if available.
191+ /// Returns an expression representing the zoned maximum statistic , if available.
190192 ///
191- /// This method returns `None` if the NaNCount stat is unknown.
192- pub fn stat_nan_count ( & self , catalog : & mut dyn StatsCatalog ) -> Option < Expression > {
193- self . vtable . as_dyn ( ) . stat_nan_count ( self , catalog)
194- }
195-
196- // TODO(ngates): I'm not sure what this is really for? We need to clean up stats compute for
197- // expressions.
198- pub fn stat_field_path ( & self ) -> Option < FieldPath > {
199- self . vtable . as_dyn ( ) . stat_field_path ( self )
193+ /// See [`Self::stat_expression`] for details.
194+ pub fn stat_max ( & self , catalog : & dyn StatsCatalog ) -> Option < Expression > {
195+ self . stat_expression ( Stat :: Max , catalog)
200196 }
201197
202198 /// Format the expression as a compact string.
0 commit comments