@@ -129,14 +129,56 @@ use smallvec::SmallVec;
129129pub struct OptimizerHints {
130130 /// If the output is sorted, contains indices of the sort key columns in the output schema.
131131 /// Each partition should meet this sort order, but order between partitions is unspecified.
132- /// Note that this does **not** guarantee the exact ordering inside each of the columns, e.g.
132+ /// Note that this does **not** specify the exact ordering inside each of the columns, e.g.
133133 /// the values may end up in ascending or descending order, nulls can go first or last.
134134 pub sort_order : Option < Vec < usize > > ,
135+
136+ // Describes the sawtoothing runs of the stream that is partially sorted. If sort_order is
137+ // present, the first element of this should be sort_order.unwrap(). If we take a sorted stream
138+ // and add a projection that removes a column in the middle of sort_order, and it isn't a single
139+ // value column, approximate_sort_order.len() would be 2, and it would be the input's sort order
140+ // split on the missing column.
141+ //
142+ // However, this is free to have jumps outside of the sort order. We might have a MergeNode
143+ // which retains the approximate_sort_order optimizer hint despite merging stuff out of order.
144+ // The approximate sort order is more "statistical" in nature.
145+ pub approximate_sort_order : Vec < Vec < usize > > ,
146+ /// True if the sort order has no jumps other than those permitted by approximate_sort_order.
147+ /// This means that the ordering represents a truly sorted order with some columns missing.
148+ pub approximate_sort_order_is_strict : bool ,
149+ /// True there are no missing columns in front of the approximate sort order. If and only if
150+ /// this and approximate_sort_order_is_strict are true, that implies sort_order should equal
151+ /// Some(approximate_sort_order[0]).
152+ pub approximate_sort_order_is_prefix : bool ,
153+
135154 /// Indices of columns that will always have the same value in each row. No information about
136155 /// the value is provided.
137156 pub single_value_columns : Vec < usize > ,
138157}
139158
159+ impl OptimizerHints {
160+ /// Use with None for sort_order is arguably deprecated. Used to adapt code that preceded
161+ /// approximate_sort_order information.
162+ fn new_sorted ( sort_order : Option < Vec < usize > > , single_value_columns : Vec < usize > ) -> OptimizerHints {
163+ let mut approximate_sort_order = Vec :: new ( ) ;
164+ let mut approximate_sort_order_is_strict = false ;
165+ let mut approximate_sort_order_is_prefix = false ;
166+ if let Some ( order) = & sort_order {
167+ approximate_sort_order. push ( order. clone ( ) ) ;
168+ approximate_sort_order_is_strict = true ;
169+ approximate_sort_order_is_prefix = true ;
170+ }
171+ let hints = OptimizerHints {
172+ sort_order,
173+ approximate_sort_order,
174+ approximate_sort_order_is_prefix,
175+ approximate_sort_order_is_strict,
176+ single_value_columns,
177+ } ;
178+ hints
179+ }
180+ }
181+
140182/// `ExecutionPlan` represent nodes in the DataFusion Physical Plan.
141183///
142184/// Each `ExecutionPlan` is Partition-aware and is responsible for
0 commit comments