Skip to content

Commit bc7434b

Browse files
committed
Add MergeSortExec approximate sort order hints
1 parent 5b5d46d commit bc7434b

File tree

3 files changed

+100
-18
lines changed

3 files changed

+100
-18
lines changed

datafusion/src/physical_plan/merge_sort.rs

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use arrow::compute::{
3333
use arrow::datatypes::SchemaRef;
3434
use arrow::error::Result as ArrowResult;
3535
use arrow::record_batch::RecordBatch;
36+
use itertools::Itertools;
3637

3738
use super::{RecordBatchStream, SendableRecordBatchStream};
3839
use crate::error::{DataFusionError, Result};
@@ -101,9 +102,77 @@ impl ExecutionPlan for MergeSortExec {
101102
}
102103

103104
fn output_hints(&self) -> OptimizerHints {
105+
// We do want to retain approximate sorting information. Note that the sorting algorithm's
106+
// index field in struct Key<'a> makes us see that each input stream's unused sort keys
107+
// result in sawtoothed runs.
108+
109+
// For example, if the input streams are sorted by columns A, B, C, D, E, and the sort key
110+
// is A, B, C, then we want the approximate_sort_order to be [[A, B, C], [D, E]], because
111+
// for a given value under ABC, the sort order will have multiple increasing (sawtoothing)
112+
// runs of columns DE the way the input streams get merged (due to the index field usage in
113+
// struct Key<'a>).
114+
115+
let mut hints: OptimizerHints = self.input.output_hints();
116+
let sort_order: Vec<usize> = self.columns.iter().map(|c| c.index()).collect();
117+
118+
'fallback: {
119+
if !hints.approximate_sort_order_is_prefix || hints.approximate_sort_order.is_empty() {
120+
break 'fallback;
121+
}
122+
let first_seg: &Vec<usize> = &hints.approximate_sort_order[0];
123+
124+
let mut sort_order_index: usize = 0;
125+
let mut approx_index: usize = 0;
126+
while sort_order_index < sort_order.len() {
127+
if first_seg[approx_index] == sort_order[sort_order_index] {
128+
sort_order_index += 1;
129+
approx_index += 1;
130+
if approx_index == first_seg.len() {
131+
break;
132+
}
133+
} else if hints.single_value_columns.contains(&first_seg[approx_index]) {
134+
approx_index += 1;
135+
if approx_index == first_seg.len() {
136+
break;
137+
}
138+
} else if hints.single_value_columns.contains(&sort_order[sort_order_index]) {
139+
sort_order_index += 1;
140+
} else {
141+
// This should not happen.
142+
break 'fallback;
143+
}
144+
}
145+
146+
if approx_index > 0 {
147+
if approx_index != first_seg.len() {
148+
let second_seg = first_seg[approx_index..].iter().map(|&x| x).collect_vec();
149+
hints.approximate_sort_order.insert(1, second_seg);
150+
hints.approximate_sort_order[0].truncate(approx_index);
151+
} else {
152+
// It would be weird if sort_order_index is not equal to sort_order.len() --
153+
// another instance of single value columns (we hope).
154+
155+
// Nothing to do here.
156+
}
157+
hints.approximate_sort_order_is_prefix = true;
158+
} else {
159+
// approx_index == 0
160+
161+
// It's possible we sorted by some single value column, and this means subsequent
162+
// columns are sawtoothing in separate columns. Or is it? Either the input_hints's
163+
// sort_order is inconsistent with the approximate_sort_order, or we have some
164+
// particular treatment of single_value_columns in different code deciding whether
165+
// we can use a MergeExec node, that leads to this case.
166+
hints.approximate_sort_order_is_prefix = false;
167+
}
168+
169+
return hints;
170+
171+
}
172+
104173
OptimizerHints::new_sorted(
105-
Some(self.columns.iter().map(|c| c.index()).collect()),
106-
self.input.output_hints().single_value_columns,
174+
Some(sort_order),
175+
hints.single_value_columns,
107176
)
108177
}
109178

@@ -616,7 +685,6 @@ impl ExecutionPlan for LastRowByUniqueKeyExec {
616685
}
617686

618687
fn output_hints(&self) -> OptimizerHints {
619-
// Possibly, this is abandoning approximate sort order information.
620688
let input_hints = self.input.output_hints();
621689
OptimizerHints::new_sorted(
622690
input_hints.sort_order,

datafusion/src/physical_plan/planner.rs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,8 +1879,6 @@ pub fn input_sortedness_by_group_key_using_approximate(
18791879
prefix_maintained = Some(false);
18801880
}
18811881
}
1882-
1883-
break;
18841882
}
18851883
if prefix_maintained.is_none() {
18861884
prefix_maintained = Some(false);
@@ -1901,7 +1899,7 @@ pub fn input_sortedness_by_group_key_using_approximate(
19011899
SortednessByGroupKey {
19021900
sort_order: approximate_sort_order,
19031901
unsorted,
1904-
detached_from_prefix: approximate_sort_order_is_prefix,
1902+
detached_from_prefix: !approximate_sort_order_is_prefix,
19051903
succeeded: true,
19061904
}
19071905
}
@@ -2256,16 +2254,34 @@ mod tests {
22562254
physical_group_key.push((phys_expr, "".to_owned()));
22572255
}
22582256

2259-
let sortedness =
2260-
input_sortedness_by_group_key(execution_plan.as_ref(), &physical_group_key);
2261-
assert!(sortedness.succeeded);
2262-
assert_eq!(
2263-
sortedness.sort_order,
2264-
vec![vec![0, 1, 2, 3, 4]]
2265-
);
2266-
assert_eq!(sortedness.unsorted, vec![] as Vec<usize>);
2267-
assert_eq!(sortedness.detached_from_prefix, false);
2268-
assert!(sortedness.is_sorted_by_group_key());
2257+
{
2258+
let sortedness =
2259+
input_sortedness_by_group_key(execution_plan.as_ref(), &physical_group_key);
2260+
assert!(sortedness.succeeded);
2261+
assert_eq!(
2262+
sortedness.sort_order,
2263+
vec![vec![0, 1, 2, 3, 4]]
2264+
);
2265+
assert_eq!(sortedness.unsorted, vec![] as Vec<usize>);
2266+
assert_eq!(sortedness.detached_from_prefix, false);
2267+
assert!(sortedness.is_sorted_by_group_key());
2268+
}
2269+
2270+
{
2271+
let sortedness =
2272+
input_sortedness_by_group_key_using_approximate(execution_plan.as_ref(), &physical_group_key);
2273+
assert!(sortedness.succeeded, "using_approximate");
2274+
assert_eq!(
2275+
sortedness.sort_order,
2276+
vec![vec![0, 1, 2, 3, 4]],
2277+
"using_approximate"
2278+
);
2279+
assert_eq!(sortedness.unsorted, vec![] as Vec<usize>, "using_approximate");
2280+
assert_eq!(sortedness.detached_from_prefix, false, "using_approximate");
2281+
assert!(sortedness.is_sorted_by_group_key(), "using_approximate");
2282+
}
2283+
2284+
22692285

22702286
Ok(())
22712287
}

datafusion/src/physical_plan/projection.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,6 @@ impl ExecutionPlan for ProjectionExec {
195195
if prefix_maintained.is_none() {
196196
prefix_maintained = Some(false);
197197
}
198-
199-
break;
200198
}
201199
}
202200
if prefix_maintained.is_none() {

0 commit comments

Comments
 (0)