@@ -35,7 +35,8 @@ use arrow_schema::{SortOptions, TimeUnit};
3535use datafusion:: { assert_batches_eq, dataframe} ;
3636use datafusion_functions_aggregate:: count:: { count_all, count_all_window} ;
3737use datafusion_functions_aggregate:: expr_fn:: {
38- array_agg, avg, count, count_distinct, max, median, min, sum,
38+ array_agg, avg, avg_distinct, count, count_distinct, max, median, min, sum,
39+ sum_distinct,
3940} ;
4041use datafusion_functions_nested:: make_array:: make_array_udf;
4142use datafusion_functions_window:: expr_fn:: { first_value, row_number} ;
@@ -502,32 +503,35 @@ async fn drop_with_periods() -> Result<()> {
502503#[ tokio:: test]
503504async fn aggregate ( ) -> Result < ( ) > {
504505 // build plan using DataFrame API
505- let df = test_table ( ) . await ?;
506+ // union so some of the distincts have a clearly distinct result
507+ let df = test_table ( ) . await ?. union ( test_table ( ) . await ?) ?;
506508 let group_expr = vec ! [ col( "c1" ) ] ;
507509 let aggr_expr = vec ! [
508- min( col( "c12" ) ) ,
509- max( col( "c12" ) ) ,
510- avg( col( "c12" ) ) ,
511- sum( col( "c12" ) ) ,
512- count( col( "c12" ) ) ,
513- count_distinct( col( "c12" ) ) ,
510+ min( col( "c4" ) ) . alias( "min(c4)" ) ,
511+ max( col( "c4" ) ) . alias( "max(c4)" ) ,
512+ avg( col( "c4" ) ) . alias( "avg(c4)" ) ,
513+ avg_distinct( col( "c4" ) ) . alias( "avg_distinct(c4)" ) ,
514+ sum( col( "c4" ) ) . alias( "sum(c4)" ) ,
515+ sum_distinct( col( "c4" ) ) . alias( "sum_distinct(c4)" ) ,
516+ count( col( "c4" ) ) . alias( "count(c4)" ) ,
517+ count_distinct( col( "c4" ) ) . alias( "count_distinct(c4)" ) ,
514518 ] ;
515519
516520 let df: Vec < RecordBatch > = df. aggregate ( group_expr, aggr_expr) ?. collect ( ) . await ?;
517521
518522 assert_snapshot ! (
519523 batches_to_sort_string( & df) ,
520- @r### "
521- +----+----------------------------- +----------------------------- +----------------------------- +----------------------------- +------------------------------- +-------------------- --------------------+
522- | c1 | min(aggregate_test_100.c12 ) | max(aggregate_test_100.c12 ) | avg(aggregate_test_100.c12) | sum(aggregate_test_100.c12 ) | count(aggregate_test_100.c12 ) | count(DISTINCT aggregate_test_100.c12 ) |
523- +----+----------------------------- +----------------------------- +----------------------------- +----------------------------- +------------------------------- +-------------------- --------------------+
524- | a | 0.02182578039211991 | 0.9800193410444061 | 0.48754517466109415 | 10.238448667882977 | 21 | 21 |
525- | b | 0.04893135681998029 | 0.9185813970744787 | 0.41040709263815384 | 7.797734760124923 | 19 | 19 |
526- | c | 0.0494924465469434 | 0.991517828651004 | 0.6600456536439784 | 13.860958726523545 | 21 | 21 |
527- | d | 0.061029375346466685 | 0.9748360509016578 | 0.48855379387549824 | 8.793968289758968 | 18 | 18 |
528- | e | 0.01479305307777301 | 0.9965400387585364 | 0.48600669271341534 | 10.206140546981722 | 21 | 21 |
529- +----+----------------------------- +----------------------------- +----------------------------- +----------------------------- +------------------------------- +-------------------- --------------------+
530- "###
524+ @r"
525+ +----+---------+ ---------+---------------------+---------------------+---------+------------------+-----------+ --------------------+
526+ | c1 | min(c4 ) | max(c4 ) | avg(c4) | avg_distinct(c4) | sum(c4 ) | sum_distinct(c4 ) | count(c4) | count_distinct(c4 ) |
527+ +----+---------+ ---------+---------------------+---------------------+---------+------------------+-----------+ --------------------+
528+ | a | -28462 | 32064 | 306.04761904761904 | 306.04761904761904 | 12854 | 6427 | 42 | 21 |
529+ | b | -28070 | 25286 | 7732.315789473684 | 7732.315789473684 | 293828 | 146914 | 38 | 19 |
530+ | c | -30508 | 29106 | -1320.5238095238096 | -1320.5238095238096 | -55462 | -27731 | 42 | 21 |
531+ | d | -24558 | 31106 | 10890.111111111111 | 10890.111111111111 | 392044 | 196022 | 36 | 18 |
532+ | e | -31500 | 32514 | -4268.333333333333 | -4268.333333333333 | -179270 | -89635 | 42 | 21 |
533+ +----+---------+ ---------+---------------------+---------------------+---------+------------------+-----------+ --------------------+
534+ "
531535 ) ;
532536
533537 Ok ( ( ) )
@@ -542,7 +546,9 @@ async fn aggregate_assert_no_empty_batches() -> Result<()> {
542546 min( col( "c12" ) ) ,
543547 max( col( "c12" ) ) ,
544548 avg( col( "c12" ) ) ,
549+ avg_distinct( col( "c12" ) ) ,
545550 sum( col( "c12" ) ) ,
551+ sum_distinct( col( "c12" ) ) ,
546552 count( col( "c12" ) ) ,
547553 count_distinct( col( "c12" ) ) ,
548554 median( col( "c12" ) ) ,
0 commit comments