@@ -4,10 +4,10 @@ use std::sync::{Arc, RwLock};
44
55use async_trait:: async_trait;
66use bit_vec:: BitVec ;
7- use exponential_decay_histogram:: ExponentialDecayHistogram ;
87use itertools:: Itertools ;
8+ use sketches_ddsketch:: DDSketch ;
99use vortex_array:: aliases:: hash_map:: HashMap ;
10- use vortex_error:: { VortexExpect , VortexResult , vortex_panic} ;
10+ use vortex_error:: { VortexExpect , VortexResult , vortex_err , vortex_panic} ;
1111use vortex_expr:: ExprRef ;
1212use vortex_expr:: forms:: cnf:: cnf;
1313use vortex_mask:: Mask ;
@@ -18,8 +18,6 @@ use crate::{
1818
1919/// The selectivity histogram quantile to use for reordering conjuncts. Where 0 == no rows match.
2020const DEFAULT_SELECTIVITY_QUANTILE : f64 = 0.1 ;
21- /// The multiplier to used to convert selectivity to i64 for the histogram.
22- const SELECTIVITY_MULTIPLIER : f64 = 1_000_000.0 ;
2321
2422/// A [`LayoutReader`] that splits boolean expressions into individual conjunctions, tracks
2523/// statistics about selectivity, and uses this information to reorder the evaluation of the
@@ -116,7 +114,7 @@ pub struct FilterExpr {
116114 /// The conjuncts involved in the filter expression.
117115 conjuncts : Vec < ExprRef > ,
118116 /// A histogram of the selectivity of each conjunct.
119- conjunct_selectivity : Vec < RwLock < ExponentialDecayHistogram > > ,
117+ conjunct_selectivity : Vec < RwLock < DDSketch > > ,
120118 /// The preferred ordering of conjuncts.
121119 ordering : RwLock < Vec < usize > > ,
122120 /// The quantile to use from the selectivity histogram of each conjunct.
@@ -129,11 +127,9 @@ impl FilterExpr {
129127 let num_conjuncts = conjuncts. len ( ) ;
130128 Self {
131129 conjuncts,
132- conjunct_selectivity : iter:: repeat_with ( || {
133- RwLock :: new ( ExponentialDecayHistogram :: new ( ) )
134- } )
135- . take ( num_conjuncts)
136- . collect ( ) ,
130+ conjunct_selectivity : iter:: repeat_with ( || RwLock :: new ( DDSketch :: default ( ) ) )
131+ . take ( num_conjuncts)
132+ . collect ( ) ,
137133 // The initial ordering is naive, we could order this by how well we expect each
138134 // comparison operator to perform. e.g. == might be more selective than <=? Not obvious.
139135 ordering : RwLock :: new ( ( 0 ..num_conjuncts) . collect ( ) ) ,
@@ -160,9 +156,7 @@ impl FilterExpr {
160156 . write ( )
161157 . vortex_expect ( "poisoned lock" ) ;
162158
163- // Since our histogram only supports i64, we map our f64 into a 0-1m range.
164- let selectivity = ( selectivity * SELECTIVITY_MULTIPLIER ) . round ( ) as i64 ;
165- histogram. update ( selectivity) ;
159+ histogram. add ( selectivity) ;
166160 }
167161
168162 let all_selectivity = self
@@ -172,8 +166,11 @@ impl FilterExpr {
172166 histogram
173167 . read ( )
174168 . vortex_expect ( "poisoned lock" )
175- . snapshot ( )
176- . value ( self . selectivity_quantile )
169+ . quantile ( self . selectivity_quantile )
170+ . map_err ( |e| vortex_err ! ( "{e}" ) ) // Only errors when the quantile is out of range
171+ . vortex_expect ( "quantile out of range" )
172+ // If the sketch is empty, its selectivity is 0.
173+ . unwrap_or_default ( )
177174 } )
178175 . collect :: < Vec < _ > > ( ) ;
179176
@@ -186,17 +183,17 @@ impl FilterExpr {
186183
187184 // Re-sort our conjuncts based on the new statistics.
188185 let mut ordering = self . ordering . write ( ) . vortex_expect ( "lock poisoned" ) ;
189- ordering. sort_unstable_by_key ( |& idx| all_selectivity[ idx] ) ;
186+ ordering. sort_unstable_by ( |& l_idx, & r_idx| {
187+ all_selectivity[ l_idx]
188+ . partial_cmp ( & all_selectivity[ r_idx] )
189+ . vortex_expect ( "Can't compare selectivity values" )
190+ } ) ;
190191
191192 log:: debug!(
192193 "Reordered conjuncts based on new selectivity {:?}" ,
193194 ordering
194195 . iter( )
195- . map( |& idx| format!(
196- "({}) => {}" ,
197- self . conjuncts[ idx] ,
198- all_selectivity[ idx] as f64 / SELECTIVITY_MULTIPLIER
199- ) )
196+ . map( |& idx| format!( "({}) => {}" , self . conjuncts[ idx] , all_selectivity[ idx] ) )
200197 . join( ", " )
201198 ) ;
202199 }
0 commit comments