@@ -6,12 +6,14 @@ mod tests {
66 use crate :: assert_snapshot;
77 use crate :: common:: localhost:: { start_localhost_context, NoopSessionBuilder } ;
88 use crate :: common:: parquet:: register_parquet_tables;
9+ use crate :: common:: plan:: distribute_aggregate;
910 use datafusion:: arrow:: util:: pretty:: pretty_format_batches;
1011 use datafusion:: physical_plan:: { displayable, execute_stream} ;
1112 use futures:: TryStreamExt ;
1213 use std:: error:: Error ;
1314
1415 #[ tokio:: test]
16+ #[ ignore]
1517 async fn distributed_aggregation ( ) -> Result < ( ) , Box < dyn Error > > {
1618 // FIXME these ports are in use on my machine, we should find unused ports
1719 // Changed them for now
@@ -26,9 +28,13 @@ mod tests {
2628
2729 let physical_str = displayable ( physical. as_ref ( ) ) . indent ( true ) . to_string ( ) ;
2830
29- println ! ( " \n \n Physical Plan: \n {}" , physical_str ) ;
31+ let physical_distributed = distribute_aggregate ( physical . clone ( ) ) ? ;
3032
31- /*assert_snapshot!(physical_str,
33+ let physical_distributed_str = displayable ( physical_distributed. as_ref ( ) )
34+ . indent ( true )
35+ . to_string ( ) ;
36+
37+ assert_snapshot ! ( physical_str,
3238 @r"
3339 ProjectionExec: expr=[count(*)@0 as count(*), RainToday@1 as RainToday]
3440 SortPreservingMergeExec: [count(Int64(1))@2 ASC NULLS LAST]
@@ -41,7 +47,24 @@ mod tests {
4147 AggregateExec: mode=Partial, gby=[RainToday@0 as RainToday], aggr=[count(Int64(1))]
4248 DataSourceExec: file_groups={1 group: [[/testdata/weather.parquet]]}, projection=[RainToday], file_type=parquet
4349 " ,
44- );*/
50+ ) ;
51+
52+ assert_snapshot ! ( physical_distributed_str,
53+ @r"
54+ ProjectionExec: expr=[count(*)@0 as count(*), RainToday@1 as RainToday]
55+ SortPreservingMergeExec: [count(Int64(1))@2 ASC NULLS LAST]
56+ SortExec: expr=[count(Int64(1))@2 ASC NULLS LAST], preserve_partitioning=[true]
57+ ProjectionExec: expr=[count(Int64(1))@1 as count(*), RainToday@0 as RainToday, count(Int64(1))@1 as count(Int64(1))]
58+ AggregateExec: mode=FinalPartitioned, gby=[RainToday@0 as RainToday], aggr=[count(Int64(1))]
59+ ArrowFlightReadExec: input_tasks=8 hash_expr=[] stage_id=UUID input_stage_id=UUID input_hosts=[http://localhost:50050/, http://localhost:50051/, http://localhost:50052/, http://localhost:50050/, http://localhost:50051/, http://localhost:50052/, http://localhost:50050/, http://localhost:50051/]
60+ CoalesceBatchesExec: target_batch_size=8192
61+ RepartitionExec: partitioning=Hash([RainToday@0], CPUs), input_partitions=CPUs
62+ RepartitionExec: partitioning=RoundRobinBatch(CPUs), input_partitions=1
63+ AggregateExec: mode=Partial, gby=[RainToday@0 as RainToday], aggr=[count(Int64(1))]
64+ ArrowFlightReadExec: input_tasks=1 hash_expr=[RainToday@0] stage_id=UUID input_stage_id=UUID input_hosts=[http://localhost:50052/]
65+ DataSourceExec: file_groups={1 group: [[/testdata/weather.parquet]]}, projection=[RainToday], file_type=parquet
66+ " ,
67+ ) ;
4568
4669 let batches = pretty_format_batches (
4770 & execute_stream ( physical, ctx. task_ctx ( ) ) ?
@@ -58,6 +81,20 @@ mod tests {
5881 +----------+-----------+
5982 " ) ;
6083
84+ let batches_distributed = pretty_format_batches (
85+ & execute_stream ( physical_distributed, ctx. task_ctx ( ) ) ?
86+ . try_collect :: < Vec < _ > > ( )
87+ . await ?,
88+ ) ?;
89+ assert_snapshot ! ( batches_distributed, @r"
90+ +----------+-----------+
91+ | count(*) | RainToday |
92+ +----------+-----------+
93+ | 66 | Yes |
94+ | 300 | No |
95+ +----------+-----------+
96+ " ) ;
97+
6198 Ok ( ( ) )
6299 }
63100}
0 commit comments