1- use super :: { NetworkShuffleExec , PartitionIsolatorExec , Stage } ;
1+ use super :: { NetworkShuffleExec , PartitionIsolatorExec } ;
22use crate :: execution_plans:: { DistributedExec , NetworkCoalesceExec } ;
3+ use crate :: stage:: Stage ;
34use datafusion:: common:: plan_err;
45use datafusion:: common:: tree_node:: TreeNodeRecursion ;
56use datafusion:: datasource:: source:: DataSourceExec ;
@@ -232,19 +233,19 @@ impl DistributedPhysicalOptimizerRule {
232233 } ;
233234
234235 let stage = loop {
235- let ( inner_plan , in_tasks ) = dnode. as_ref ( ) . to_stage_info ( n_tasks) ?;
236+ let input_stage_info = dnode. as_ref ( ) . get_input_stage_info ( n_tasks) ?;
236237 // If the current stage has just 1 task, and the next stage is only going to have
237238 // 1 task, there's no point in having a network boundary in between, they can just
238239 // communicate in memory.
239- if n_tasks == 1 && in_tasks == 1 {
240+ if n_tasks == 1 && input_stage_info . task_count == 1 {
240241 let mut n = dnode. as_ref ( ) . rollback ( ) ?;
241242 if let Some ( node) = n. as_any ( ) . downcast_ref :: < PartitionIsolatorExec > ( ) {
242243 // Also trim PartitionIsolatorExec out of the plan.
243244 n = Arc :: clone ( node. children ( ) . first ( ) . unwrap ( ) ) ;
244245 }
245246 return Ok ( Transformed :: yes ( n) ) ;
246247 }
247- match Self :: _distribute_plan_inner ( query_id, inner_plan . clone ( ) , num, depth + 1 , in_tasks ) {
248+ match Self :: _distribute_plan_inner ( query_id, input_stage_info . plan , num, depth + 1 , input_stage_info . task_count ) {
248249 Ok ( v) => break v,
249250 Err ( e) => match get_distribute_plan_err ( & e) {
250251 None => return Err ( e) ,
@@ -253,7 +254,7 @@ impl DistributedPhysicalOptimizerRule {
253254 // that no more than `limit` tasks can be used for it, so we are going
254255 // to limit the amount of tasks to the requested number and try building
255256 // the stage again.
256- if in_tasks == * limit {
257+ if input_stage_info . task_count == * limit {
257258 return plan_err ! ( "A node requested {limit} tasks for the stage its in, but that stage already has that many tasks" ) ;
258259 }
259260 dnode = Referenced :: Arced ( dnode. as_ref ( ) . with_input_task_count ( * limit) ?) ;
@@ -278,14 +279,27 @@ impl DistributedPhysicalOptimizerRule {
278279 }
279280}
280281
282+ /// Necessary information for building a [Stage] during distributed planning.
283+ ///
284+ /// [NetworkBoundary]s return this piece of data so that the distributed planner know how to
285+ /// build the next [Stage] from which the [NetworkBoundary] is going to receive data.
286+ ///
287+ /// Some network boundaries might perform some modifications in their children, like scaling
288+ /// up the number of partitions, or injecting a specific [ExecutionPlan] on top.
289+ pub struct InputStageInfo {
290+ /// The head plan of the [Stage] that is about to be built.
291+ pub plan : Arc < dyn ExecutionPlan > ,
292+ /// The amount of tasks the [Stage] will have.
293+ pub task_count : usize ,
294+ }
295+
281296/// This trait represents a node that introduces the necessity of a network boundary in the plan.
282297/// The distributed planner, upon stepping into one of these, will break the plan and build a stage
283298/// out of it.
284299pub trait NetworkBoundary : ExecutionPlan {
285- /// Returns the information necessary for building the next stage.
286- /// - The head node of the stage.
287- /// - the amount of tasks that stage will have.
288- fn to_stage_info ( & self , n_tasks : usize ) -> Result < ( Arc < dyn ExecutionPlan > , usize ) > ;
300+ /// Returns the information necessary for building the next stage from which this
301+ /// [NetworkBoundary] is going to collect data.
302+ fn get_input_stage_info ( & self , task_count : usize ) -> Result < InputStageInfo > ;
289303
290304 /// re-assigns a different number of input tasks to the current [NetworkBoundary].
291305 ///
@@ -295,6 +309,8 @@ pub trait NetworkBoundary: ExecutionPlan {
295309
296310 /// Called when a [Stage] is correctly formed. The [NetworkBoundary] can use this
297311 /// information to perform any internal transformations necessary for distributed execution.
312+ ///
313+ /// Typically, [NetworkBoundary]s will use this call for transitioning from "Pending" to "ready".
298314 fn with_input_stage ( & self , input_stage : Stage ) -> Result < Arc < dyn ExecutionPlan > > ;
299315
300316 /// Returns the assigned input [Stage], if any.
0 commit comments