@@ -5,6 +5,7 @@ use datafusion::config::ConfigOptions;
55use datafusion:: datasource:: physical_plan:: FileScanConfig ;
66use datafusion:: physical_plan:: ExecutionPlan ;
77use datafusion:: prelude:: SessionConfig ;
8+ use delegate:: delegate;
89use std:: collections:: HashSet ;
910use std:: fmt:: Debug ;
1011use std:: sync:: Arc ;
@@ -58,6 +59,34 @@ pub struct TaskEstimation {
5859 pub task_count : TaskCountAnnotation ,
5960}
6061
62+ impl TaskEstimation {
63+ /// Tells the distributed planner that the evaluated stage can have **at maximum** the provided
64+ /// number of tasks, setting a hard upper limit.
65+ ///
66+ /// Returning `TaskEstimation::maximum(1)` tells the distributed planner that the evaluated
67+ /// stage cannot be distributed.
68+ ///
69+ /// Even if a `TaskEstimation::maximum(N)` is provided, any other node in the same stage
70+ /// providing a value of `TaskEstimation::maximum(M)` where `M` < `N` will have preference.
71+ pub fn maximum ( value : usize ) -> Self {
72+ TaskEstimation {
73+ task_count : TaskCountAnnotation :: Maximum ( value) ,
74+ }
75+ }
76+
77+ /// Tells the distributed planner that the evaluated can **optimally** have the provided
78+ /// number of tasks, setting a soft task count hint that can be overridden by others.
79+ ///
80+ /// The provided `TaskEstimation::desired(N)` can be overridden by:
81+ /// - Other nodes providing a `TaskEstimation::desired(M)` where `M` > `N`.
82+ /// - Any other node providing a `TaskEstimation::maximum(M)` where `M` can be anything.
83+ pub fn desired ( value : usize ) -> Self {
84+ TaskEstimation {
85+ task_count : TaskCountAnnotation :: Desired ( value) ,
86+ }
87+ }
88+ }
89+
6190/// Given a leaf node, provides an estimation about how many tasks should be used in the
6291/// stage containing it, and if the leaf node should be replaced by some other.
6392///
@@ -66,14 +95,19 @@ pub struct TaskEstimation {
6695/// count calculated based on whether lower stages are reducing the cardinality of the data
6796/// or increasing it.
6897pub trait TaskEstimator {
69- /// Function applied to leaf nodes that returns a [TaskEstimation] hinting how many
70- /// tasks should be used in the [Stage] containing that leaf node.
98+ /// Function applied to each node that returns a [TaskEstimation] hinting how many
99+ /// tasks should be used in the [Stage] containing that node.
100+ ///
101+ /// All the [TaskEstimator] registered in the session will be applied to the node
102+ /// until one returns an estimation.
103+ ///
71104 ///
72- /// All the [TaskEstimator] registered in the session will be applied to the leaf node
73- /// until one returns an estimation. If no estimation is return from any of the
74- /// [TaskEstimator]s, then `Maximum(1)` is returned, hinting the distributed planner to not
75- /// distribute the stage containing that node.
76- fn tasks_for_leaf_node (
105+ /// If no estimation is returned from any of the registered [TaskEstimator]s, then:
106+ /// - If the node is a leaf node,`Maximum(1)` is assumed, hinting the distributed planner
107+ /// that the leaf node cannot be distributed across tasks.
108+ /// - If the node is a normal node in the plan, then the maximum task count from its children
109+ /// is inherited.
110+ fn task_estimation (
77111 & self ,
78112 plan : & Arc < dyn ExecutionPlan > ,
79113 cfg : & ConfigOptions ,
@@ -91,14 +125,18 @@ pub trait TaskEstimator {
91125}
92126
93127impl TaskEstimator for usize {
94- fn tasks_for_leaf_node (
128+ fn task_estimation (
95129 & self ,
96- _ : & Arc < dyn ExecutionPlan > ,
130+ inputs : & Arc < dyn ExecutionPlan > ,
97131 _: & ConfigOptions ,
98132 ) -> Option < TaskEstimation > {
99- Some ( TaskEstimation {
100- task_count : TaskCountAnnotation :: Desired ( * self ) ,
101- } )
133+ if inputs. children ( ) . is_empty ( ) {
134+ Some ( TaskEstimation {
135+ task_count : TaskCountAnnotation :: Desired ( * self ) ,
136+ } )
137+ } else {
138+ None
139+ }
102140 }
103141
104142 fn scale_up_leaf_node (
@@ -112,40 +150,20 @@ impl TaskEstimator for usize {
112150}
113151
114152impl TaskEstimator for Arc < dyn TaskEstimator > {
115- fn tasks_for_leaf_node (
116- & self ,
117- plan : & Arc < dyn ExecutionPlan > ,
118- cfg : & ConfigOptions ,
119- ) -> Option < TaskEstimation > {
120- self . as_ref ( ) . tasks_for_leaf_node ( plan, cfg)
121- }
122-
123- fn scale_up_leaf_node (
124- & self ,
125- plan : & Arc < dyn ExecutionPlan > ,
126- task_count : usize ,
127- cfg : & ConfigOptions ,
128- ) -> Option < Arc < dyn ExecutionPlan > > {
129- self . as_ref ( ) . scale_up_leaf_node ( plan, task_count, cfg)
153+ delegate ! {
154+ to self . as_ref( ) {
155+ fn task_estimation( & self , plan: & Arc <dyn ExecutionPlan >, cfg: & ConfigOptions ) -> Option <TaskEstimation >;
156+ fn scale_up_leaf_node( & self , plan: & Arc <dyn ExecutionPlan >, task_count: usize , cfg: & ConfigOptions ) -> Option <Arc <dyn ExecutionPlan >>;
157+ }
130158 }
131159}
132160
133161impl TaskEstimator for Arc < dyn TaskEstimator + Send + Sync > {
134- fn tasks_for_leaf_node (
135- & self ,
136- plan : & Arc < dyn ExecutionPlan > ,
137- cfg : & ConfigOptions ,
138- ) -> Option < TaskEstimation > {
139- self . as_ref ( ) . tasks_for_leaf_node ( plan, cfg)
140- }
141-
142- fn scale_up_leaf_node (
143- & self ,
144- plan : & Arc < dyn ExecutionPlan > ,
145- task_count : usize ,
146- cfg : & ConfigOptions ,
147- ) -> Option < Arc < dyn ExecutionPlan > > {
148- self . as_ref ( ) . scale_up_leaf_node ( plan, task_count, cfg)
162+ delegate ! {
163+ to self . as_ref( ) {
164+ fn task_estimation( & self , plan: & Arc <dyn ExecutionPlan >, cfg: & ConfigOptions ) -> Option <TaskEstimation >;
165+ fn scale_up_leaf_node( & self , plan: & Arc <dyn ExecutionPlan >, task_count: usize , cfg: & ConfigOptions ) -> Option <Arc <dyn ExecutionPlan >>;
166+ }
149167 }
150168}
151169
@@ -177,15 +195,16 @@ pub(crate) fn set_distributed_task_estimator(
177195struct FileScanConfigTaskEstimator ;
178196
179197impl TaskEstimator for FileScanConfigTaskEstimator {
180- fn tasks_for_leaf_node (
198+ fn task_estimation (
181199 & self ,
182200 plan : & Arc < dyn ExecutionPlan > ,
183201 cfg : & ConfigOptions ,
184202 ) -> Option < TaskEstimation > {
185- let d_cfg = cfg. extensions . get :: < DistributedConfig > ( ) ?;
186203 let dse: & DataSourceExec = plan. as_any ( ) . downcast_ref ( ) ?;
187204 let file_scan: & FileScanConfig = dse. data_source ( ) . as_any ( ) . downcast_ref ( ) ?;
188205
206+ let d_cfg = cfg. extensions . get :: < DistributedConfig > ( ) ?;
207+
189208 // Count how many distinct files we have in the FileScanConfig. Each file in each
190209 // file group is a PartitionedFile rather than a full file, so it's possible that
191210 // many entries refer to different chunks of the same physical file. By keeping a
@@ -244,21 +263,21 @@ pub(crate) struct CombinedTaskEstimator {
244263}
245264
246265impl TaskEstimator for CombinedTaskEstimator {
247- fn tasks_for_leaf_node (
266+ fn task_estimation (
248267 & self ,
249268 plan : & Arc < dyn ExecutionPlan > ,
250269 cfg : & ConfigOptions ,
251270 ) -> Option < TaskEstimation > {
252271 for estimator in & self . user_provided {
253- if let Some ( result) = estimator. tasks_for_leaf_node ( plan, cfg) {
272+ if let Some ( result) = estimator. task_estimation ( plan, cfg) {
254273 return Some ( result) ;
255274 }
256275 }
257276 // We want to execute the default estimators last so that the user-provided ones have
258277 // a chance of providing an estimation.
259278 // If none of the user-provided returned an estimation, the default ones are used.
260279 for default_estimator in [ & FileScanConfigTaskEstimator as & dyn TaskEstimator ] {
261- if let Some ( result) = default_estimator. tasks_for_leaf_node ( plan, cfg) {
280+ if let Some ( result) = default_estimator. task_estimation ( plan, cfg) {
262281 return Some ( result) ;
263282 }
264283 }
@@ -348,7 +367,7 @@ mod tests {
348367 ..Default :: default ( )
349368 } ;
350369 cfg. extensions . insert ( f ( d_cfg) ) ;
351- self . tasks_for_leaf_node ( & node, & cfg)
370+ self . task_estimation ( & node, & cfg)
352371 . unwrap ( )
353372 . task_count
354373 . as_usize ( )
@@ -370,7 +389,7 @@ mod tests {
370389 }
371390
372391 impl < F : Fn ( & Arc < dyn ExecutionPlan > , & ConfigOptions ) -> Option < TaskEstimation > > TaskEstimator for F {
373- fn tasks_for_leaf_node (
392+ fn task_estimation (
374393 & self ,
375394 plan : & Arc < dyn ExecutionPlan > ,
376395 cfg : & ConfigOptions ,
0 commit comments