15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- //! Enforcement optimizer rules are used to make sure the plan's Distribution and Ordering
19
- //! requirements are met by inserting necessary [[ RepartitionExec]] and [[SortExec]].
20
- //!
18
+ //! EnforceDistribution optimizer rule inspects the physical plan with respect
19
+ //! to distribution requirements and adds [ RepartitionExec]s to satisfy them
20
+ //! when necessary.
21
21
use crate :: config:: ConfigOptions ;
22
22
use crate :: error:: Result ;
23
- use crate :: physical_optimizer:: utils:: { add_sort_above_child, ordering_satisfy} ;
24
23
use crate :: physical_optimizer:: PhysicalOptimizerRule ;
25
24
use crate :: physical_plan:: aggregates:: { AggregateExec , AggregateMode , PhysicalGroupBy } ;
26
25
use crate :: physical_plan:: coalesce_partitions:: CoalescePartitionsExec ;
@@ -46,25 +45,25 @@ use datafusion_physical_expr::{
46
45
use std:: collections:: HashMap ;
47
46
use std:: sync:: Arc ;
48
47
49
- /// BasicEnforcement rule, it ensures the Distribution and Ordering requirements are met
50
- /// in the strictest way. It might add additional [[ RepartitionExec] ] to the plan tree
48
+ /// The EnforceDistribution rule ensures that distribution requirements are met
49
+ /// in the strictest way. It might add additional [RepartitionExec] to the plan tree
51
50
/// and give a non-optimal plan, but it can avoid the possible data skew in joins.
52
51
///
53
52
/// For example for a HashJoin with keys(a, b, c), the required Distribution(a, b, c) can be satisfied by
54
53
/// several alternative partitioning ways: [(a, b, c), (a, b), (a, c), (b, c), (a), (b), (c), ( )].
55
54
///
56
55
/// This rule only chooses the exactly match and satisfies the Distribution(a, b, c) by a HashPartition(a, b, c).
57
56
#[ derive( Default ) ]
58
- pub struct BasicEnforcement { }
57
+ pub struct EnforceDistribution { }
59
58
60
- impl BasicEnforcement {
59
+ impl EnforceDistribution {
61
60
#[ allow( missing_docs) ]
62
61
pub fn new ( ) -> Self {
63
62
Self { }
64
63
}
65
64
}
66
65
67
- impl PhysicalOptimizerRule for BasicEnforcement {
66
+ impl PhysicalOptimizerRule for EnforceDistribution {
68
67
fn optimize (
69
68
& self ,
70
69
plan : Arc < dyn ExecutionPlan > ,
@@ -81,24 +80,21 @@ impl PhysicalOptimizerRule for BasicEnforcement {
81
80
} else {
82
81
plan
83
82
} ;
84
- // Distribution and Ordering enforcement need to be applied bottom-up.
83
+ // Distribution enforcement needs to be applied bottom-up.
85
84
new_plan. transform_up ( & {
86
85
|plan| {
87
86
let adjusted = if !top_down_join_key_reordering {
88
87
reorder_join_keys_to_inputs ( plan) ?
89
88
} else {
90
89
plan
91
90
} ;
92
- Ok ( Some ( ensure_distribution_and_ordering (
93
- adjusted,
94
- target_partitions,
95
- ) ?) )
91
+ Ok ( Some ( ensure_distribution ( adjusted, target_partitions) ?) )
96
92
}
97
93
} )
98
94
}
99
95
100
96
fn name ( & self ) -> & str {
101
- "BasicEnforcement "
97
+ "EnforceDistribution "
102
98
}
103
99
104
100
fn schema_check ( & self ) -> bool {
@@ -829,10 +825,11 @@ fn new_join_conditions(
829
825
new_join_on
830
826
}
831
827
832
- /// Within this function, it checks whether we need to add additional plan operators
833
- /// of data exchanging and data ordering to satisfy the required distribution and ordering.
834
- /// And we should avoid to manually add plan operators of data exchanging and data ordering in other places
835
- fn ensure_distribution_and_ordering (
828
+ /// This function checks whether we need to add additional data exchange
829
+ /// operators to satisfy distribution requirements. Since this function
830
+ /// takes care of such requirements, we should avoid manually adding data
831
+ /// exchange operators in other places.
832
+ fn ensure_distribution (
836
833
plan : Arc < dyn crate :: physical_plan:: ExecutionPlan > ,
837
834
target_partitions : usize ,
838
835
) -> Result < Arc < dyn crate :: physical_plan:: ExecutionPlan > > {
@@ -841,13 +838,11 @@ fn ensure_distribution_and_ordering(
841
838
}
842
839
843
840
let required_input_distributions = plan. required_input_distribution ( ) ;
844
- let required_input_orderings = plan. required_input_ordering ( ) ;
845
841
let children: Vec < Arc < dyn ExecutionPlan > > = plan. children ( ) ;
846
842
assert_eq ! ( children. len( ) , required_input_distributions. len( ) ) ;
847
- assert_eq ! ( children. len( ) , required_input_orderings. len( ) ) ;
848
843
849
844
// Add RepartitionExec to guarantee output partitioning
850
- let children = children
845
+ let new_children : Result < Vec < Arc < dyn ExecutionPlan > > > = children
851
846
. into_iter ( )
852
847
. zip ( required_input_distributions. into_iter ( ) )
853
848
. map ( |( child, required) | {
@@ -870,24 +865,8 @@ fn ensure_distribution_and_ordering(
870
865
} ;
871
866
new_child
872
867
}
873
- } ) ;
874
-
875
- // Add local SortExec to guarantee output ordering within each partition
876
- let new_children: Result < Vec < Arc < dyn ExecutionPlan > > > = children
877
- . zip ( required_input_orderings. into_iter ( ) )
878
- . map ( |( child_result, required) | {
879
- let child = child_result?;
880
- if ordering_satisfy ( child. output_ordering ( ) , required, || {
881
- child. equivalence_properties ( )
882
- } ) {
883
- Ok ( child)
884
- } else {
885
- let sort_expr = required. unwrap ( ) . to_vec ( ) ;
886
- add_sort_above_child ( & child, sort_expr)
887
- }
888
868
} )
889
869
. collect ( ) ;
890
-
891
870
with_new_children_if_necessary ( plan, new_children?)
892
871
}
893
872
@@ -979,6 +958,7 @@ mod tests {
979
958
use super :: * ;
980
959
use crate :: datasource:: listing:: PartitionedFile ;
981
960
use crate :: datasource:: object_store:: ObjectStoreUrl ;
961
+ use crate :: physical_optimizer:: sort_enforcement:: EnforceSorting ;
982
962
use crate :: physical_plan:: aggregates:: {
983
963
AggregateExec , AggregateMode , PhysicalGroupBy ,
984
964
} ;
@@ -1136,8 +1116,15 @@ mod tests {
1136
1116
config. execution. target_partitions = 10 ;
1137
1117
1138
1118
// run optimizer
1139
- let optimizer = BasicEnforcement { } ;
1119
+ let optimizer = EnforceDistribution { } ;
1140
1120
let optimized = optimizer. optimize( $PLAN, & config) ?;
1121
+ // NOTE: These tests verify the joint `EnforceDistribution` + `EnforceSorting` cascade
1122
+ // because they were written prior to the separation of `BasicEnforcement` into
1123
+ // `EnforceSorting` and `EnfoceDistribution`.
1124
+ // TODO: Orthogonalize the tests here just to verify `EnforceDistribution` and create
1125
+ // new tests for the cascade.
1126
+ let optimizer = EnforceSorting { } ;
1127
+ let optimized = optimizer. optimize( optimized, & config) ?;
1141
1128
1142
1129
// Now format correctly
1143
1130
let plan = displayable( optimized. as_ref( ) ) . indent( ) . to_string( ) ;
@@ -1656,7 +1643,7 @@ mod tests {
1656
1643
Column :: new_with_schema( "c1" , & right. schema( ) ) . unwrap( ) ,
1657
1644
) ,
1658
1645
] ;
1659
- let bottom_left_join = ensure_distribution_and_ordering (
1646
+ let bottom_left_join = ensure_distribution (
1660
1647
hash_join_exec ( left. clone ( ) , right. clone ( ) , & join_on, & JoinType :: Inner ) ,
1661
1648
10 ,
1662
1649
) ?;
@@ -1686,7 +1673,7 @@ mod tests {
1686
1673
Column :: new_with_schema( "a1" , & right. schema( ) ) . unwrap( ) ,
1687
1674
) ,
1688
1675
] ;
1689
- let bottom_right_join = ensure_distribution_and_ordering (
1676
+ let bottom_right_join = ensure_distribution (
1690
1677
hash_join_exec ( left, right. clone ( ) , & join_on, & JoinType :: Inner ) ,
1691
1678
10 ,
1692
1679
) ?;
@@ -1775,7 +1762,7 @@ mod tests {
1775
1762
Column :: new_with_schema( "b1" , & right. schema( ) ) . unwrap( ) ,
1776
1763
) ,
1777
1764
] ;
1778
- let bottom_left_join = ensure_distribution_and_ordering (
1765
+ let bottom_left_join = ensure_distribution (
1779
1766
hash_join_exec ( left. clone ( ) , right. clone ( ) , & join_on, & JoinType :: Inner ) ,
1780
1767
10 ,
1781
1768
) ?;
@@ -1805,7 +1792,7 @@ mod tests {
1805
1792
Column :: new_with_schema( "a1" , & right. schema( ) ) . unwrap( ) ,
1806
1793
) ,
1807
1794
] ;
1808
- let bottom_right_join = ensure_distribution_and_ordering (
1795
+ let bottom_right_join = ensure_distribution (
1809
1796
hash_join_exec ( left, right. clone ( ) , & join_on, & JoinType :: Inner ) ,
1810
1797
10 ,
1811
1798
) ?;
0 commit comments