Skip to content

Commit de89805

Browse files
committed
WIP: warnings and comments
1 parent 1d84d6f commit de89805

File tree

4 files changed

+16
-15
lines changed

4 files changed

+16
-15
lines changed

datafusion/src/physical_plan/merge.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ impl MergeExec {
6363
&self.input
6464
}
6565

66+
/// Computes output hints appropriate for MergeExec nodes or anything behaving like one.
6667
pub fn output_hints_from_input_hints(input: &dyn ExecutionPlan) -> OptimizerHints {
6768
let input_hints = input.output_hints();
6869
let sort_order;

datafusion/src/physical_plan/mod.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,15 @@ pub struct OptimizerHints {
133133
/// the values may end up in ascending or descending order, nulls can go first or last.
134134
pub sort_order: Option<Vec<usize>>,
135135

136-
// Describes the sawtoothing runs of the stream that is partially sorted. If sort_order is
137-
// present, the first element of this should be sort_order.unwrap(). If we take a sorted stream
138-
// and add a projection that removes a column in the middle of sort_order, and it isn't a single
139-
// value column, approximate_sort_order.len() would be 2, and it would be the input's sort order
140-
// split on the missing column.
141-
//
142-
// However, this is free to have jumps outside of the sort order. We might have a MergeNode
143-
// which retains the approximate_sort_order optimizer hint despite merging stuff out of order.
144-
// The approximate sort order is more "statistical" in nature.
136+
/// Describes the sawtoothing runs of the stream that is partially sorted. If sort_order is
137+
/// present, the first element of this should be sort_order.unwrap(). If we take a sorted stream
138+
/// and add a projection that removes a column in the middle of sort_order, and it isn't a single
139+
/// value column, approximate_sort_order.len() would be 2, and it would be the input's sort order
140+
/// split on the missing column.
141+
///
142+
/// However, this is free to have jumps outside of the sort order. We might have a MergeNode
143+
/// which retains the approximate_sort_order optimizer hint despite merging stuff out of order.
144+
/// The approximate sort order is more "statistical" in nature.
145145
pub approximate_sort_order: Vec<Vec<usize>>,
146146
/// True if the sort order has no jumps other than those permitted by approximate_sort_order.
147147
/// This means that the ordering represents a truly sorted order with some columns missing.

datafusion/src/physical_plan/planner.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,8 +1746,8 @@ impl SortednessByGroupKey {
17461746
/// Checks the degree to which input is sortable by a group key. If it succeeds, returns clumps of
17471747
/// effectively adjacent sort key columns. For example, if the input's sort key is (A, B, S, C, D,
17481748
/// E, F, G, H, I, J), and S is a single value column, and the group keys are for Column values C,
1749-
/// E, F, I, B, and K, then this function will return {sort_order: [[(1, B), (3, C)], [(5, E), (6,
1750-
/// F)], [(9, I)]], unsorted: [K], succeeded: true}.
1749+
/// E, F, I, B, and K, then this function will return {sort_order: [[#B, #C], [#E, #F], [#I]],
1750+
/// unsorted: [#K], succeeded: true}, where #X is the offset of column X in the group key.
17511751
pub fn input_sortedness_by_group_key(
17521752
input: &dyn ExecutionPlan,
17531753
group_key: &[(Arc<dyn PhysicalExpr>, String)],
@@ -1826,6 +1826,7 @@ pub fn input_sortedness_by_group_key(
18261826
}
18271827
}
18281828

1829+
/// Computes input_sortedness_by_group_key using approximate sorting information.
18291830
pub fn input_sortedness_by_group_key_using_approximate(
18301831
input: &dyn ExecutionPlan,
18311832
group_key: &[(Arc<dyn PhysicalExpr>, String)],
@@ -1853,6 +1854,8 @@ pub fn input_sortedness_by_group_key_using_approximate(
18531854
input_to_group[input_col] = Some(group_i);
18541855
}
18551856

1857+
// This is practically a copy/paste of ProjectionExec output_hints code -- except for
1858+
// group_key_used -- maybe combine the two.
18561859
let mut group_key_used = vec![false; group_key.len()];
18571860
let mut prefix_maintained = None::<bool>;
18581861
let mut approximate_sort_order = Vec::new();
@@ -1884,11 +1887,9 @@ pub fn input_sortedness_by_group_key_using_approximate(
18841887
}
18851888
if !out_segment.is_empty() {
18861889
approximate_sort_order.push(out_segment);
1887-
out_segment = Vec::new();
18881890
}
18891891
}
18901892

1891-
let approximate_sort_order_is_strict = hints.approximate_sort_order_is_strict;
18921893
let approximate_sort_order_is_prefix = hints.approximate_sort_order_is_prefix && prefix_maintained == Some(true);
18931894
let mut unsorted = Vec::<usize>::new();
18941895
for (group_i, key_used) in group_key_used.into_iter().enumerate() {
@@ -2260,7 +2261,7 @@ mod tests {
22602261
assert!(sortedness.succeeded);
22612262
assert_eq!(
22622263
sortedness.sort_order,
2263-
vec![vec![(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]]
2264+
vec![vec![0, 1, 2, 3, 4]]
22642265
);
22652266
assert_eq!(sortedness.unsorted, vec![] as Vec<usize>);
22662267
assert_eq!(sortedness.detached_from_prefix, false);

datafusion/src/physical_plan/projection.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,6 @@ impl ExecutionPlan for ProjectionExec {
206206

207207
if !out_segment.is_empty() {
208208
approximate_sort_order.push(out_segment);
209-
out_segment = Vec::new();
210209
}
211210
}
212211
let approximate_sort_order_is_strict = input_hints.approximate_sort_order_is_strict;

0 commit comments

Comments
 (0)