Skip to content

Commit a68563b

Browse files
doc: Document caveats of swap_inputs() interface in join executors (#17373)
* doc caveats of `swap_inputs()` interface in join executors * fix ci * Update datafusion/physical-plan/src/joins/hash_join/exec.rs Co-authored-by: Jonathan Chen <[email protected]> --------- Co-authored-by: Jonathan Chen <[email protected]>
1 parent 71d3d29 commit a68563b

File tree

4 files changed

+32
-0
lines changed

4 files changed

+32
-0
lines changed

datafusion/physical-plan/src/joins/cross_join.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,12 @@ impl CrossJoinExec {
175175
/// Returns a new `ExecutionPlan` that computes the same join as this one,
176176
/// with the left and right inputs swapped using the specified
177177
/// `partition_mode`.
178+
///
179+
/// # Notes:
180+
///
181+
/// This function should be called BEFORE inserting any repartitioning
182+
/// operators on the join's children. Check [`super::HashJoinExec::swap_inputs`]
183+
/// for more details.
178184
pub fn swap_inputs(&self) -> Result<Arc<dyn ExecutionPlan>> {
179185
let new_join =
180186
CrossJoinExec::new(Arc::clone(&self.right), Arc::clone(&self.left));

datafusion/physical-plan/src/joins/hash_join/exec.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,21 @@ impl HashJoinExec {
638638
///
639639
/// This function is public so other downstream projects can use it to
640640
/// construct `HashJoinExec` with right side as the build side.
641+
///
642+
/// For using this interface directly, please refer to below:
643+
///
644+
/// Hash join execution may require specific input partitioning (for example,
645+
/// the left child may have a single partition while the right child has multiple).
646+
///
647+
/// Calling this function on join nodes whose children have already been repartitioned
648+
/// (e.g., after a `RepartitionExec` has been inserted) may break the partitioning
649+
/// requirements of the hash join. Therefore, ensure you call this function
650+
/// before inserting any repartitioning operators on the join's children.
651+
///
652+
/// In DataFusion's default SQL interface, this function is used by the `JoinSelection`
653+
/// physical optimizer rule to determine a good join order, which is
654+
/// executed before the `EnforceDistribution` rule (the rule that may
655+
/// insert `RepartitionExec` operators).
641656
pub fn swap_inputs(
642657
&self,
643658
partition_mode: PartitionMode,

datafusion/physical-plan/src/joins/nested_loop_join.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,12 @@ impl NestedLoopJoinExec {
348348

349349
/// Returns a new `ExecutionPlan` that runs NestedLoopsJoins with the left
350350
/// and right inputs swapped.
351+
///
352+
/// # Notes:
353+
///
354+
/// This function should be called BEFORE inserting any repartitioning
355+
/// operators on the join's children. Check [`super::HashJoinExec::swap_inputs`]
356+
/// for more details.
351357
pub fn swap_inputs(&self) -> Result<Arc<dyn ExecutionPlan>> {
352358
let left = self.left();
353359
let right = self.right();

datafusion/physical-plan/src/joins/sort_merge_join/exec.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,11 @@ impl SortMergeJoinExec {
304304
))
305305
}
306306

307+
/// # Notes:
308+
///
309+
/// This function should be called BEFORE inserting any repartitioning
310+
/// operators on the join's children. Check [`super::super::HashJoinExec::swap_inputs`]
311+
/// for more details.
307312
pub fn swap_inputs(&self) -> Result<Arc<dyn ExecutionPlan>> {
308313
let left = self.left();
309314
let right = self.right();

0 commit comments

Comments
 (0)