@@ -517,12 +517,7 @@ impl DefaultPhysicalPlanner {
517517 match input_sortedness. sawtooth_levels ( ) {
518518 Some ( 0 ) => {
519519 log:: error!( "DefaultPhysicalExpr: Perfect match for inplace aggregation" ) ;
520- let order = input_sortedness. sort_order [ 0 ]
521- . iter ( )
522- . map ( |( _sort_key_offset, group_key_offset) | {
523- * group_key_offset
524- } )
525- . collect_vec ( ) ;
520+ let order = input_sortedness. sort_order [ 0 ] . clone ( ) ; // TODO: No clone?
526521 ( AggregateStrategy :: InplaceSorted , AggregateStrategy :: InplaceSorted , Some ( order) )
527522 }
528523 Some ( n) => {
@@ -1695,13 +1690,12 @@ pub fn evaluate_const(expr: Arc<dyn PhysicalExpr>) -> Result<Arc<dyn PhysicalExp
16951690/// Return value of input_sortedness_by_group_key. If succeeded, every group key offset appears in
16961691/// sort_order or unsorted exactly once.
16971692pub struct SortednessByGroupKey {
1698- /// Elems are (offset into the sort key, offset into the group key), with sort key offsets
1699- /// strictly increasing. Each Vec<(usize, usize)> is a clump of adjacent columns, with
1693+ /// Elems are offsets into the group key. Each Vec<usize> is a clump of adjacent columns, with
17001694 /// adjacency considered after ignoring single value columns.
17011695 ///
1702- /// Each column clump sees the input ordering in sawtoothing runs of rows, sawtoothing with different
1703- /// granularity.
1704- pub sort_order : Vec < Vec < ( usize , usize ) > > ,
1696+ /// Each column clump sees the input ordering in sawtoothing runs of rows, sawtoothing with
1697+ /// different granularity.
1698+ pub sort_order : Vec < Vec < usize > > ,
17051699 /// Indexes into the group key.
17061700 pub unsorted : Vec < usize > ,
17071701 /// true if the first clump of sort_order is detached from the prefix of the sort key (ignoring
@@ -1741,10 +1735,7 @@ impl SortednessByGroupKey {
17411735 /// existing compute_aggregate_strategy function.
17421736 pub fn compute_aggregate_strategy ( & self ) -> ( AggregateStrategy , Option < Vec < usize > > ) {
17431737 if self . is_sorted_by_group_key ( ) {
1744- let order = self . sort_order [ 0 ]
1745- . iter ( )
1746- . map ( |& ( _sort_i, group_i) | group_i)
1747- . collect_vec ( ) ;
1738+ let order = self . sort_order [ 0 ] . clone ( ) ;
17481739 ( AggregateStrategy :: InplaceSorted , Some ( order) )
17491740 } else {
17501741 ( AggregateStrategy :: Hash , None )
@@ -1804,14 +1795,14 @@ pub fn input_sortedness_by_group_key(
18041795 } ;
18051796 }
18061797
1807- let mut clumps = Vec :: < Vec < ( usize , usize ) > > :: new ( ) ;
1798+ let mut clumps = Vec :: < Vec < usize > > :: new ( ) ;
18081799 // At this point we walk through the sort_key_hit vec.
1809- let mut clump = Vec :: < ( usize , usize ) > :: new ( ) ;
1800+ let mut clump = Vec :: < usize > :: new ( ) ;
18101801 // Are our clumps detached from the sort prefix?
18111802 let mut detached_from_prefix = false ;
18121803 for ( i, & hit) in sort_key_hit. iter ( ) . enumerate ( ) {
18131804 if hit {
1814- clump. push ( ( i , sort_to_group[ i] ) ) ;
1805+ clump. push ( sort_to_group[ i] ) ;
18151806 } else if hints. single_value_columns . contains ( & sort_key[ i] ) {
18161807 // Don't end the clump.
18171808 } else {
@@ -1835,6 +1826,85 @@ pub fn input_sortedness_by_group_key(
18351826 }
18361827}
18371828
1829+ pub fn input_sortedness_by_group_key_using_approximate (
1830+ input : & dyn ExecutionPlan ,
1831+ group_key : & [ ( Arc < dyn PhysicalExpr > , String ) ] ,
1832+ ) -> SortednessByGroupKey {
1833+ if group_key. is_empty ( ) {
1834+ // The caller has to deal with it (and in fact it wants to).
1835+ return SortednessByGroupKey :: failed ( ) ;
1836+ }
1837+
1838+ let hints = input. output_hints ( ) ;
1839+ let input_schema = input. schema ( ) ;
1840+ let mut input_to_group = vec ! [ None ; input_schema. fields( ) . len( ) ] ;
1841+
1842+ for ( group_i, ( g, _) ) in group_key. iter ( ) . enumerate ( ) {
1843+ let col = g. as_any ( ) . downcast_ref :: < Column > ( ) ;
1844+ if col. is_none ( ) {
1845+ return SortednessByGroupKey :: failed ( ) ;
1846+ }
1847+ let input_col = input_schema. index_of ( col. unwrap ( ) . name ( ) ) ;
1848+ if input_col. is_err ( ) {
1849+ return SortednessByGroupKey :: failed ( ) ;
1850+ }
1851+ let input_col = input_col. unwrap ( ) ;
1852+ // If we have two group by exprs for the same input column, we might not optimize well in that case.
1853+ input_to_group[ input_col] = Some ( group_i) ;
1854+ }
1855+
1856+ let mut group_key_used = vec ! [ false ; group_key. len( ) ] ;
1857+ let mut prefix_maintained = None :: < bool > ;
1858+ let mut approximate_sort_order = Vec :: new ( ) ;
1859+ for in_segment in hints. approximate_sort_order {
1860+ let mut out_segment = Vec :: new ( ) ;
1861+ for in_col in in_segment {
1862+ if let Some ( group_i) = input_to_group[ in_col] {
1863+ if prefix_maintained. is_none ( ) {
1864+ prefix_maintained = Some ( true ) ;
1865+ }
1866+ out_segment. push ( group_i) ;
1867+ group_key_used[ group_i] = true ;
1868+ } else if hints. single_value_columns . contains ( & in_col) {
1869+ continue ;
1870+ } else {
1871+ if !out_segment. is_empty ( ) {
1872+ approximate_sort_order. push ( out_segment) ;
1873+ out_segment = Vec :: new ( ) ;
1874+ }
1875+ if prefix_maintained. is_none ( ) {
1876+ prefix_maintained = Some ( false ) ;
1877+ }
1878+ }
1879+
1880+ break ;
1881+ }
1882+ if prefix_maintained. is_none ( ) {
1883+ prefix_maintained = Some ( false ) ;
1884+ }
1885+ if !out_segment. is_empty ( ) {
1886+ approximate_sort_order. push ( out_segment) ;
1887+ out_segment = Vec :: new ( ) ;
1888+ }
1889+ }
1890+
1891+ let approximate_sort_order_is_strict = hints. approximate_sort_order_is_strict ;
1892+ let approximate_sort_order_is_prefix = hints. approximate_sort_order_is_prefix && prefix_maintained == Some ( true ) ;
1893+ let mut unsorted = Vec :: < usize > :: new ( ) ;
1894+ for ( group_i, key_used) in group_key_used. into_iter ( ) . enumerate ( ) {
1895+ if !key_used {
1896+ unsorted. push ( group_i) ;
1897+ }
1898+ }
1899+
1900+ SortednessByGroupKey {
1901+ sort_order : approximate_sort_order,
1902+ unsorted,
1903+ detached_from_prefix : approximate_sort_order_is_prefix,
1904+ succeeded : true ,
1905+ }
1906+ }
1907+
18381908fn tuple_err < T , R > ( value : ( Result < T > , Result < R > ) ) -> Result < ( T , R ) > {
18391909 match value {
18401910 ( Ok ( e) , Ok ( e1) ) => Ok ( ( e, e1) ) ,
0 commit comments