@@ -1758,7 +1758,8 @@ fn tuple_err<T, R>(value: (Result<T>, Result<R>)) -> Result<(T, R)> {
17581758#[ cfg( test) ]
17591759mod tests {
17601760 use super :: * ;
1761- use crate :: logical_plan:: { DFField , DFSchema , DFSchemaRef } ;
1761+ use crate :: logical_plan:: { and, DFField , DFSchema , DFSchemaRef } ;
1762+ use crate :: physical_plan:: OptimizerHints ;
17621763 use crate :: physical_plan:: { csv:: CsvReadOptions , expressions, Partitioning } ;
17631764 use crate :: scalar:: ScalarValue ;
17641765 use crate :: {
@@ -2041,6 +2042,50 @@ mod tests {
20412042 Ok ( ( ) )
20422043 }
20432044
2045+ #[ test]
2046+ fn hash_agg_aggregation_strategy_with_nongrouped_single_value_columns_in_sort_key ( ) -> Result < ( ) > {
2047+ let testdata = crate :: test_util:: arrow_test_data ( ) ;
2048+ let path = format ! ( "{}/csv/aggregate_test_100.csv" , testdata) ;
2049+
2050+ let options = CsvReadOptions :: new ( ) . schema_infer_max_records ( 100 ) ;
2051+
2052+ fn sort ( column_name : & str ) -> Expr {
2053+ col ( column_name) . sort ( true , true )
2054+ }
2055+
2056+ // Instead of creating a mock ExecutionPlan, we have some input plan which produces the desired output_hints().
2057+ let logical_plan = LogicalPlanBuilder :: scan_csv ( path, options, None ) ?
2058+ . filter ( and ( col ( "c4" ) . eq ( lit ( "value_a" ) ) , col ( "c8" ) . eq ( lit ( "value_b" ) ) ) ) ?
2059+ . sort ( vec ! [ sort( "c1" ) , sort( "c2" ) , sort( "c3" ) , sort( "c4" ) , sort( "c5" ) , sort( "c6" ) , sort( "c7" ) , sort( "c8" ) ] ) ?
2060+ . build ( ) ?;
2061+
2062+ let execution_plan = plan ( & logical_plan) ?;
2063+
2064+ // Note that both single_value_columns are part of the sort key... but one will not be part of the group key.
2065+ let hints: OptimizerHints = execution_plan. output_hints ( ) ;
2066+ assert_eq ! ( hints. sort_order, Some ( vec![ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ) ;
2067+ assert_eq ! ( hints. single_value_columns, vec![ 3 , 7 ] ) ;
2068+
2069+ // Now make a group_key that overlaps one single_value_column, but the single value column 7
2070+ // has column 5 and 6 ("c6" and "c7" respectively) in between.
2071+ let group_key = vec ! [ col( "c1" ) , col( "c2" ) , col( "c3" ) , col( "c4" ) , col( "c5" ) ] ;
2072+ let mut ctx_state = make_ctx_state ( ) ;
2073+ ctx_state. config . concurrency = 4 ;
2074+ let planner = DefaultPhysicalPlanner :: default ( ) ;
2075+ let mut physical_group_key = Vec :: new ( ) ;
2076+ for expr in group_key {
2077+ let phys_expr = planner. create_physical_expr ( & expr, & logical_plan. schema ( ) , & execution_plan. schema ( ) , & ctx_state) ?;
2078+ physical_group_key. push ( ( phys_expr, "" . to_owned ( ) ) ) ;
2079+ }
2080+
2081+ let mut sort_order = Vec :: < usize > :: new ( ) ;
2082+ let is_sorted: bool = input_sorted_by_group_key ( execution_plan. as_ref ( ) , & physical_group_key, & mut sort_order) ;
2083+ assert ! ( is_sorted) ;
2084+ assert_eq ! ( sort_order, vec![ 0 , 1 , 2 , 3 , 4 ] ) ;
2085+
2086+ Ok ( ( ) )
2087+ }
2088+
20442089 #[ test]
20452090 fn test_explain ( ) {
20462091 let schema = Schema :: new ( vec ! [ Field :: new( "id" , DataType :: Int32 , false ) ] ) ;
0 commit comments