@@ -239,6 +239,29 @@ pub trait DistributedExt: Sized {
239239 ///
240240 /// The first one that returns something for a leaf node is the one that decides how many
241241 /// tasks are used.
242+ ///
243+ /// ```text
244+ /// ┌───────────────────────┐
245+ /// │SortPreservingMergeExec│
246+ /// └───────────────────────┘
247+ /// ▲
248+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┼ ─ ─ ─ ─ ─ ─ ─ ─ Stage 2
249+ /// ┌───────────┴───────────┐ │
250+ /// │ │ SortExec │
251+ /// └───────────────────────┘ │
252+ /// │ ┌───────────────────────┐
253+ /// │ AggregateExec │ │
254+ /// │ └───────────────────────┘
255+ /// ─ ─ ─ ─ ─ ─ ─ ─▲─ ─ ─ ─ ─ ─ ─ ─ ┘
256+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┴ ─ ─ ─ ─ ─ ─ ─ ─ Stage 1
257+ /// ┌───────────────────────┐ │
258+ /// │ │ FilterExec │
259+ /// └───────────────────────┘ │
260+ /// │ ┌───────────────────────┐ TaskEstimator estimates tasks in
261+ /// │ SomeExec │◀───┼── stages containing leaf nodes
262+ /// │ └───────────────────────┘
263+ /// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
264+ /// ```
242265 fn with_distributed_task_estimator < T : TaskEstimator + Send + Sync + ' static > (
243266 self ,
244267 estimator : T ,
@@ -253,6 +276,29 @@ pub trait DistributedExt: Sized {
253276 /// Sets the maximum number of files each task in a stage with a FileScanConfig node will
254277 /// handle. Reducing this number will increment the amount of tasks. By default, this
255278 /// is close to the number of cores in the machine.
279+ ///
280+ /// ```text
281+ /// ┌───────────────────────┐
282+ /// │SortPreservingMergeExec│
283+ /// └───────────────────────┘
284+ /// ▲
285+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┼ ─ ─ ─ ─ ─ ─ ─ ─ Stage 2
286+ /// ┌───────────┴───────────┐ │
287+ /// │ │ SortExec │
288+ /// └───────────────────────┘ │
289+ /// │ ┌───────────────────────┐
290+ /// │ AggregateExec │ │
291+ /// │ └───────────────────────┘
292+ /// ─ ─ ─ ─ ─ ─ ─ ─▲─ ─ ─ ─ ─ ─ ─ ─ ┘
293+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┴ ─ ─ ─ ─ ─ ─ ─ ─ Stage 1
294+ /// ┌───────────────────────┐ │
295+ /// │ │ FilterExec │
296+ /// └───────────────────────┘ │
297+ /// │ ┌───────────────────────┐ Sets the max number of files
298+ /// │ FileScanConfig │◀───┼─ each task will handle. Less
299+ /// │ └───────────────────────┘ files_per_task == more tasks
300+ /// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
301+ ///```
256302 fn with_distributed_files_per_task (
257303 self ,
258304 files_per_task : usize ,
@@ -277,6 +323,29 @@ pub trait DistributedExt: Sized {
277323 /// This function sets the scale factor for when encountering these nodes that change the
278324 /// cardinality of the data. For example, if a stage with 10 tasks contains an AggregateExec
279325 /// node, and the scale factor is 2.0, the following stage will use 10 / 2.0 = 5 tasks.
326+ ///
327+ /// ```text
328+ /// ┌───────────────────────┐
329+ /// │SortPreservingMergeExec│
330+ /// └───────────────────────┘
331+ /// ▲
332+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┼ ─ ─ ─ ─ ─ ─ ─ ─ Stage 2 (N/scale_factor tasks)
333+ /// ┌───────────┴───────────┐ │
334+ /// │ │ SortExec │
335+ /// └───────────────────────┘ │
336+ /// │ ┌───────────────────────┐
337+ /// │ AggregateExec │ │
338+ /// │ └───────────────────────┘
339+ /// ─ ─ ─ ─ ─ ─ ─ ─▲─ ─ ─ ─ ─ ─ ─ ─ ┘
340+ /// ┌ ─ ─ ─ ─ ─ ─ ─ ┴ ─ ─ ─ ─ ─ ─ ─ ─ Stage 1 (N tasks)
341+ /// ┌───────────────────────┐ │ A filter reduces cardinality,
342+ /// │ │ FilterExec │◀────────therefore the next stage will have
343+ /// └───────────────────────┘ │ less tasks according to this factor
344+ /// │ ┌───────────────────────┐
345+ /// │ FileScanConfig │ │
346+ /// │ └───────────────────────┘
347+ /// ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
348+ /// ```
280349 fn with_distributed_cardinality_effect_task_scale_factor (
281350 self ,
282351 factor : f64 ,
0 commit comments