11use std:: sync:: Arc ;
22
33use anyhow:: anyhow;
4- use arrow:: datatypes:: SchemaRef ;
4+ use arrow:: { compute :: concat_batches , datatypes:: SchemaRef } ;
55use datafusion:: {
6- logical_expr:: LogicalPlan , physical_plan:: ExecutionPlan , prelude:: SessionContext ,
6+ logical_expr:: LogicalPlan ,
7+ physical_plan:: { coalesce_partitions:: CoalescePartitionsExec , ExecutionPlan } ,
8+ prelude:: SessionContext ,
79} ;
810
911use datafusion_substrait:: { logical_plan:: consumer:: from_substrait_plan, substrait:: proto:: Plan } ;
12+ use tokio_stream:: StreamExt ;
1013
1114use crate :: {
1215 explain:: build_explain_batch,
@@ -77,6 +80,7 @@ impl QueryPlanner {
7780 match logical_plan {
7881 p @ LogicalPlan :: Explain ( _) => self . prepare_explain ( p, ctx) . await ,
7982 // add other logical plans for local execution here following the pattern for explain
83+ p @ LogicalPlan :: DescribeTable ( _) => self . prepare_local ( p, ctx) . await ,
8084 p => self . prepare_query ( p, ctx) . await ,
8185 }
8286 }
@@ -89,6 +93,7 @@ impl QueryPlanner {
8993 match logical_plan {
9094 p @ LogicalPlan :: Explain ( _) => self . prepare_explain ( p, ctx) . await ,
9195 // add other logical plans for local execution here following the pattern for explain
96+ p @ LogicalPlan :: DescribeTable ( _) => self . prepare_local ( p, ctx) . await ,
9297 p => self . prepare_query ( p, ctx) . await ,
9398 }
9499 }
@@ -103,44 +108,30 @@ impl QueryPlanner {
103108 self . send_it ( logical_plan, physical_plan, ctx) . await
104109 }
105110
106- async fn send_it (
111+ async fn prepare_local (
107112 & self ,
108113 logical_plan : LogicalPlan ,
109- physical_plan : Arc < dyn ExecutionPlan > ,
110114 ctx : SessionContext ,
111115 ) -> Result < QueryPlan > {
112- let query_id = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
113-
114- // divide the physical plan into chunks (tasks) that we can distribute to workers
115- let ( distributed_plan, distributed_stages) =
116- execution_planning ( physical_plan. clone ( ) , 8192 , Some ( 2 ) ) . await ?;
116+ let physical_plan = physical_planning ( & logical_plan, & ctx) . await ?;
117117
118- let worker_addrs = get_worker_addresses ( ) ?;
118+ // execute it locally
119+ let mut stream =
120+ Arc :: new ( CoalescePartitionsExec :: new ( physical_plan) ) . execute ( 0 , ctx. task_ctx ( ) ) ?;
121+ let mut batches = vec ! [ ] ;
119122
120- // gather some information we need to send back such that
121- // we can send a ticket to the client
122- let final_stage = & distributed_stages[ distributed_stages. len ( ) - 1 ] ;
123- let schema = Arc :: clone ( & final_stage. plan . schema ( ) ) ;
124- let final_stage_id = final_stage. stage_id ;
123+ while let Some ( batch) = stream. next ( ) . await {
124+ batches. push ( batch?) ;
125+ }
125126
126- // distribute the stages to workers, further dividing them up
127- // into chunks of partitions (partition_groups)
128- let ( final_workers, tasks) =
129- distribute_stages ( & query_id, distributed_stages, worker_addrs) . await ?;
127+ if batches. is_empty ( ) {
128+ return Err ( anyhow ! ( "No data returned from local execution" ) . into ( ) ) ;
129+ }
130130
131- let qp = QueryPlan {
132- query_id,
133- session_context : ctx,
134- worker_addresses : final_workers,
135- final_stage_id,
136- schema,
137- logical_plan,
138- physical_plan,
139- distributed_plan,
140- distributed_tasks : tasks,
141- } ;
131+ let combined_batch = concat_batches ( & batches[ 0 ] . schema ( ) , & batches) ?;
132+ let physical_plan = Arc :: new ( RecordBatchExec :: new ( combined_batch) ) ;
142133
143- Ok ( qp )
134+ self . send_it ( logical_plan , physical_plan , ctx ) . await
144135 }
145136
146137 async fn prepare_explain (
@@ -172,4 +163,43 @@ impl QueryPlanner {
172163 )
173164 . await
174165 }
166+ async fn send_it (
167+ & self ,
168+ logical_plan : LogicalPlan ,
169+ physical_plan : Arc < dyn ExecutionPlan > ,
170+ ctx : SessionContext ,
171+ ) -> Result < QueryPlan > {
172+ let query_id = uuid:: Uuid :: new_v4 ( ) . to_string ( ) ;
173+
174+ // divide the physical plan into chunks (tasks) that we can distribute to workers
175+ let ( distributed_plan, distributed_stages) =
176+ execution_planning ( physical_plan. clone ( ) , 8192 , Some ( 2 ) ) . await ?;
177+
178+ let worker_addrs = get_worker_addresses ( ) ?;
179+
180+ // gather some information we need to send back such that
181+ // we can send a ticket to the client
182+ let final_stage = & distributed_stages[ distributed_stages. len ( ) - 1 ] ;
183+ let schema = Arc :: clone ( & final_stage. plan . schema ( ) ) ;
184+ let final_stage_id = final_stage. stage_id ;
185+
186+ // distribute the stages to workers, further dividing them up
187+ // into chunks of partitions (partition_groups)
188+ let ( final_workers, tasks) =
189+ distribute_stages ( & query_id, distributed_stages, worker_addrs) . await ?;
190+
191+ let qp = QueryPlan {
192+ query_id,
193+ session_context : ctx,
194+ worker_addresses : final_workers,
195+ final_stage_id,
196+ schema,
197+ logical_plan,
198+ physical_plan,
199+ distributed_plan,
200+ distributed_tasks : tasks,
201+ } ;
202+
203+ Ok ( qp)
204+ }
175205}
0 commit comments