@@ -15,10 +15,64 @@ use crate::task::ExecutionTask;
1515use crate :: ChannelManager ;
1616
1717/// A unit of isolation for a portion of a physical execution plan
18- /// that can be executed independently.
18+ /// that can be executed independently and across a network boundary.
19+ /// It implements [`ExecutionPlan`] and can be executed to produce a
20+ /// stream of record batches.
1921///
20- /// see https://howqueryengineswork.com/13-distributed-query.html
22+ /// An ExecutionTask is a finer grained unit of work compared to an ExecutionStage.
23+ /// One ExecutionStage will create one or more ExecutionTasks
2124///
25+ /// When an [`ExecutionStage`] is execute()'d if will execute its plan and return a stream
26+ /// of record batches.
27+ ///
28+ /// If the stage has input stages, then those input stages will be executed on remote resources
29+ /// and will be provided the remainder of the stage tree.
30+ ///
31+ /// For example if our stage tree looks like this:
32+ ///
33+ /// ```text
34+ /// ┌─────────┐
35+ /// │ stage 1 │
36+ /// └───┬─────┘
37+ /// │
38+ /// ┌──────┴────────┐
39+ /// ┌────┴────┐ ┌────┴────┐
40+ /// │ stage 2 │ │ stage 3 │
41+ /// └────┬────┘ └─────────┘
42+ /// │
43+ /// ┌──────┴────────┐
44+ /// ┌────┴────┐ ┌────┴────┐
45+ /// │ stage 4 │ │ Stage 5 │
46+ /// └─────────┘ └─────────┘
47+ ///
48+ /// ```
49+ ///
50+ /// Then executing Stage 1 will run its plan locally. Stage 1 has two inputs, Stage 2 and Stage 3. We
51+ /// know these will execute on remote resources. As such the plan for Stage 1 must contain an
52+ /// [`ArrowFlightReadExec`] node that will read the results of Stage 2 and Stage 3 and coalese the
53+ /// results.
54+ ///
55+ /// When Stage 1's [`ArrowFlightReadExec`] node is executed, it makes an ArrowFlightRequest to the
56+ /// host assigned in the Stage. It provides the following Stage tree serialilzed in the body of the
57+ /// Arrow Flight Ticket:
58+ ///
59+ /// ```text
60+ /// ┌─────────┐
61+ /// │ Stage 2 │
62+ /// └────┬────┘
63+ /// │
64+ /// ┌──────┴────────┐
65+ /// ┌────┴────┐ ┌────┴────┐
66+ /// │ Stage 4 │ │ Stage 5 │
67+ /// └─────────┘ └─────────┘
68+ ///
69+ /// ```
70+ ///
71+ /// The receiving ArrowFlightEndpoint will then execute Stage 2 and will repeat this process.
72+ ///
73+ /// When Stage 4 is executed, it has no input tasks, so it is assumed that the plan included in that
74+ /// Stage can complete on its own; its likely holding a leaf node in the overall phyysical plan and
75+ /// producing data from a [`DataSourceExec`].
2276#[ derive( Debug , Clone ) ]
2377pub struct ExecutionStage {
2478 /// Our stage number
0 commit comments