@@ -1716,16 +1716,21 @@ fn input_sorted_by_group_key(
17161716 }
17171717 sort_to_group[ sort_key_pos] = group_i;
17181718 }
1719- for i in 0 ..sort_key. len ( ) {
1720- if hints. single_value_columns . contains ( & sort_key[ i] ) {
1721- sort_key_hit[ i] = true ;
1719+
1720+ // At this point all elements of the group key mapped into some column of the sort key. This
1721+ // checks the group key is mapped into a prefix of the sort key, except that it's okay if it
1722+ // skips over single value columns.
1723+ let mut pref_len: usize = 0 ;
1724+ for ( i, hit) in sort_key_hit. iter ( ) . enumerate ( ) {
1725+ if !hit && !hints. single_value_columns . contains ( & sort_key[ i] ) {
1726+ break ;
17221727 }
1728+ pref_len += 1 ;
17231729 }
17241730
1725- // At this point all elements of the group key mapped into some column of the sort key.
1726- // This checks the group key is mapped into a prefix of the sort key.
1727- let pref_len = sort_key_hit. iter ( ) . take_while ( |present| * * present) . count ( ) ;
17281731 if sort_key_hit[ pref_len..] . iter ( ) . any ( |present| * present) {
1732+ // The group key did not hit a contiguous prefix of the sort key (ignoring single value
1733+ // columns); return false.
17291734 return false ;
17301735 }
17311736
@@ -1753,7 +1758,8 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
17531758#[ cfg( test) ]
17541759mod tests {
17551760 use super :: * ;
1756- use crate :: logical_plan:: { DFField , DFSchema , DFSchemaRef } ;
1761+ use crate :: logical_plan:: { and, DFField , DFSchema , DFSchemaRef } ;
1762+ use crate :: physical_plan:: OptimizerHints ;
17571763 use crate :: physical_plan:: { csv:: CsvReadOptions , expressions, Partitioning } ;
17581764 use crate :: scalar:: ScalarValue ;
17591765 use crate :: {
@@ -2036,6 +2042,72 @@ mod tests {
20362042 Ok ( ( ) )
20372043 }
20382044
2045+ #[ test]
2046+ fn hash_agg_aggregation_strategy_with_nongrouped_single_value_columns_in_sort_key (
2047+ ) -> Result < ( ) > {
2048+ let testdata = crate :: test_util:: arrow_test_data ( ) ;
2049+ let path = format ! ( "{}/csv/aggregate_test_100.csv" , testdata) ;
2050+
2051+ let options = CsvReadOptions :: new ( ) . schema_infer_max_records ( 100 ) ;
2052+
2053+ fn sort ( column_name : & str ) -> Expr {
2054+ col ( column_name) . sort ( true , true )
2055+ }
2056+
2057+ // Instead of creating a mock ExecutionPlan, we have some input plan which produces the desired output_hints().
2058+ let logical_plan = LogicalPlanBuilder :: scan_csv ( path, options, None ) ?
2059+ . filter ( and (
2060+ col ( "c4" ) . eq ( lit ( "value_a" ) ) ,
2061+ col ( "c8" ) . eq ( lit ( "value_b" ) ) ,
2062+ ) ) ?
2063+ . sort ( vec ! [
2064+ sort( "c1" ) ,
2065+ sort( "c2" ) ,
2066+ sort( "c3" ) ,
2067+ sort( "c4" ) ,
2068+ sort( "c5" ) ,
2069+ sort( "c6" ) ,
2070+ sort( "c7" ) ,
2071+ sort( "c8" ) ,
2072+ ] ) ?
2073+ . build ( ) ?;
2074+
2075+ let execution_plan = plan ( & logical_plan) ?;
2076+
2077+ // Note that both single_value_columns are part of the sort key... but one will not be part of the group key.
2078+ let hints: OptimizerHints = execution_plan. output_hints ( ) ;
2079+ assert_eq ! ( hints. sort_order, Some ( vec![ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ) ;
2080+ assert_eq ! ( hints. single_value_columns, vec![ 3 , 7 ] ) ;
2081+
2082+ // Now make a group_key that overlaps one single_value_column, but the single value column 7
2083+ // has column 5 and 6 ("c6" and "c7" respectively) in between.
2084+ let group_key = vec ! [ col( "c1" ) , col( "c2" ) , col( "c3" ) , col( "c4" ) , col( "c5" ) ] ;
2085+ let mut ctx_state = make_ctx_state ( ) ;
2086+ ctx_state. config . concurrency = 4 ;
2087+ let planner = DefaultPhysicalPlanner :: default ( ) ;
2088+ let mut physical_group_key = Vec :: new ( ) ;
2089+ for expr in group_key {
2090+ let phys_expr = planner. create_physical_expr (
2091+ & expr,
2092+ & logical_plan. schema ( ) ,
2093+ & execution_plan. schema ( ) ,
2094+ & ctx_state,
2095+ ) ?;
2096+ physical_group_key. push ( ( phys_expr, "" . to_owned ( ) ) ) ;
2097+ }
2098+
2099+ let mut sort_order = Vec :: < usize > :: new ( ) ;
2100+ let is_sorted: bool = input_sorted_by_group_key (
2101+ execution_plan. as_ref ( ) ,
2102+ & physical_group_key,
2103+ & mut sort_order,
2104+ ) ;
2105+ assert ! ( is_sorted) ;
2106+ assert_eq ! ( sort_order, vec![ 0 , 1 , 2 , 3 , 4 ] ) ;
2107+
2108+ Ok ( ( ) )
2109+ }
2110+
20392111 #[ test]
20402112 fn test_explain ( ) {
20412113 let schema = Schema :: new ( vec ! [ Field :: new( "id" , DataType :: Int32 , false ) ] ) ;
0 commit comments