Skip to content

Commit 258e18c

Browse files
authored
Use SortMergeJoinExec name consistently in physical plan outputs (#19246)
## Which issue does this PR close? - closes #19238 ## Rationale for this change `SortMergeJoinExec` is currently displayed inconsistently across physical plan formats, see [join.slt](https://github.com/apache/datafusion/blob/20870c18a418ec081d44ecf8a90a30a95aa53138/datafusion/sqllogictest/test_files/joins.slt#L2727) vs. [explain_tree.slt](https://github.com/apache/datafusion/blob/20870c18a418ec081d44ecf8a90a30a95aa53138/datafusion/sqllogictest/test_files/explain_tree.slt#L1203). These examples show that the tree-fmt plan uses SortMergeJoinExec, while the indent-fmt plan uses SortMergeJoin. Standardizing the operator name improves clarity and aligns with the naming conventions of other execution operators. ## What changes are included in this PR? Updates the `DisplayAs` implementation for `SortMergeJoinExec` to output `"SortMergeJoinExec: ..."`. Updates SQL Logic Test expected outputs in `joins.slt` to reflect the unified naming. No functional behavior changes; this is a display/consistency fix. ## Are these changes tested? Yes. This change is encapsulated in existing SQL Logic Tests. I updated those expected outputs to match the new standardized naming. All tests pass with the updated format. ## Are there any user-facing changes? Yes—users inspecting physical plans will now consistently see `SortMergeJoinExec` instead of `SortMergeJoin`.
1 parent d0d9311 commit 258e18c

File tree

6 files changed

+50
-49
lines changed

6 files changed

+50
-49
lines changed

datafusion/core/tests/physical_optimizer/enforce_distribution.rs

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,8 +1424,8 @@ fn multi_smj_joins() -> Result<()> {
14241424
// Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
14251425
JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
14261426
assert_plan!(plan_distrib, @r"
1427-
SortMergeJoin: join_type=..., on=[(a@0, c@2)]
1428-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1427+
SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
1428+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
14291429
SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
14301430
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
14311431
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1439,20 +1439,20 @@ fn multi_smj_joins() -> Result<()> {
14391439
");
14401440
}
14411441
// Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
1442-
// Since ordering of the left child is not preserved after SortMergeJoin
1442+
// Since ordering of the left child is not preserved after SortMergeJoinExec
14431443
// when mode is Right, RightSemi, RightAnti, Full
1444-
// - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
1444+
// - We need to add one additional SortExec after SortMergeJoinExec in contrast the test cases
14451445
// when mode is Inner, Left, LeftSemi, LeftAnti
14461446
// Similarly, since partitioning of the left side is not preserved
14471447
// when mode is Right, RightSemi, RightAnti, Full
1448-
// - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
1448+
// - We need to add one additional Hash Repartition after SortMergeJoinExec in contrast the test
14491449
// cases when mode is Inner, Left, LeftSemi, LeftAnti
14501450
_ => {
14511451
assert_plan!(plan_distrib, @r"
1452-
SortMergeJoin: join_type=..., on=[(a@0, c@2)]
1452+
SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
14531453
SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
14541454
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
1455-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1455+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
14561456
SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
14571457
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
14581458
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1474,8 +1474,8 @@ fn multi_smj_joins() -> Result<()> {
14741474
JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
14751475
// TODO(wiedld): show different test result if enforce distribution first.
14761476
assert_plan!(plan_sort, @r"
1477-
SortMergeJoin: join_type=..., on=[(a@0, c@2)]
1478-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1477+
SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
1478+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
14791479
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
14801480
SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
14811481
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1489,22 +1489,22 @@ fn multi_smj_joins() -> Result<()> {
14891489
");
14901490
}
14911491
// Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
1492-
// Since ordering of the left child is not preserved after SortMergeJoin
1492+
// Since ordering of the left child is not preserved after SortMergeJoinExec
14931493
// when mode is Right, RightSemi, RightAnti, Full
1494-
// - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
1494+
// - We need to add one additional SortExec after SortMergeJoinExec in contrast the test cases
14951495
// when mode is Inner, Left, LeftSemi, LeftAnti
14961496
// Similarly, since partitioning of the left side is not preserved
14971497
// when mode is Right, RightSemi, RightAnti, Full
14981498
// - We need to add one additional Hash Repartition and Roundrobin repartition after
1499-
// SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
1499+
// SortMergeJoinExec in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
15001500
_ => {
15011501
// TODO(wiedld): show different test result if enforce distribution first.
15021502
assert_plan!(plan_sort, @r"
1503-
SortMergeJoin: join_type=..., on=[(a@0, c@2)]
1503+
SortMergeJoinExec: join_type=..., on=[(a@0, c@2)]
15041504
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
15051505
SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
15061506
CoalescePartitionsExec
1507-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1507+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
15081508
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
15091509
SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
15101510
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1536,8 +1536,8 @@ fn multi_smj_joins() -> Result<()> {
15361536
JoinType::Inner | JoinType::Right => {
15371537
// TODO(wiedld): show different test result if enforce sorting first.
15381538
assert_plan!(plan_distrib, @r"
1539-
SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
1540-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1539+
SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
1540+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
15411541
SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
15421542
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
15431543
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1554,10 +1554,10 @@ fn multi_smj_joins() -> Result<()> {
15541554
JoinType::Left | JoinType::Full => {
15551555
// TODO(wiedld): show different test result if enforce sorting first.
15561556
assert_plan!(plan_distrib, @r"
1557-
SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
1557+
SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
15581558
SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]
15591559
RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
1560-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1560+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
15611561
SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
15621562
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
15631563
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1581,8 +1581,8 @@ fn multi_smj_joins() -> Result<()> {
15811581
JoinType::Inner | JoinType::Right => {
15821582
// TODO(wiedld): show different test result if enforce distribution first.
15831583
assert_plan!(plan_sort, @r"
1584-
SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
1585-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1584+
SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
1585+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
15861586
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
15871587
SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
15881588
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1599,11 +1599,11 @@ fn multi_smj_joins() -> Result<()> {
15991599
JoinType::Left | JoinType::Full => {
16001600
// TODO(wiedld): show different test result if enforce distribution first.
16011601
assert_plan!(plan_sort, @r"
1602-
SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
1602+
SortMergeJoinExec: join_type=..., on=[(b1@6, c@2)]
16031603
RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=1, maintains_sort_order=true
16041604
SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]
16051605
CoalescePartitionsExec
1606-
SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
1606+
SortMergeJoinExec: join_type=..., on=[(a@0, b1@1)]
16071607
RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1, maintains_sort_order=true
16081608
SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
16091609
DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
@@ -1682,7 +1682,7 @@ fn smj_join_key_ordering() -> Result<()> {
16821682
// Only two RepartitionExecs added
16831683
let plan_distrib = test_config.to_plan(join.clone(), &DISTRIB_DISTRIB_SORT);
16841684
assert_plan!(plan_distrib, @r"
1685-
SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
1685+
SortMergeJoinExec: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
16861686
SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]
16871687
ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
16881688
ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
@@ -1703,7 +1703,7 @@ fn smj_join_key_ordering() -> Result<()> {
17031703
// Test: result IS DIFFERENT, if EnforceSorting is run first:
17041704
let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB);
17051705
assert_plan!(plan_sort, @r"
1706-
SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
1706+
SortMergeJoinExec: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
17071707
RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=1, maintains_sort_order=true
17081708
SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]
17091709
CoalescePartitionsExec

datafusion/core/tests/physical_optimizer/enforce_sorting.rs

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1340,12 +1340,12 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
13401340
assert_snapshot!(test.run(), @r"
13411341
Input Plan:
13421342
SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
1343-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1343+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
13441344
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
13451345
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
13461346
13471347
Optimized Plan:
1348-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1348+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
13491349
SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
13501350
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
13511351
SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1357,13 +1357,13 @@ async fn test_sort_merge_join_order_by_left() -> Result<()> {
13571357
assert_snapshot!(test.run(), @r"
13581358
Input Plan:
13591359
SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
1360-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1360+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
13611361
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
13621362
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
13631363
13641364
Optimized Plan:
13651365
SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
1366-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1366+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
13671367
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
13681368
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
13691369
SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1430,12 +1430,12 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
14301430
assert_snapshot!(test.run(), @r"
14311431
Input Plan:
14321432
SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]
1433-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1433+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14341434
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14351435
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
14361436
14371437
Optimized Plan:
1438-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1438+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14391439
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
14401440
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14411441
SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]
@@ -1447,12 +1447,12 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
14471447
assert_snapshot!(test.run(), @r"
14481448
Input Plan:
14491449
SortPreservingMergeExec: [col_a@0 ASC, col_b@1 ASC]
1450-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1450+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14511451
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14521452
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
14531453
14541454
Optimized Plan:
1455-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1455+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14561456
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
14571457
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14581458
SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]
@@ -1464,13 +1464,13 @@ async fn test_sort_merge_join_order_by_right() -> Result<()> {
14641464
assert_snapshot!(test.run(), @r"
14651465
Input Plan:
14661466
SortPreservingMergeExec: [col_a@2 ASC, col_b@3 ASC]
1467-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1467+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14681468
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14691469
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
14701470
14711471
Optimized Plan:
14721472
SortExec: expr=[col_a@2 ASC, col_b@3 ASC], preserve_partitioning=[false]
1473-
SortMergeJoin: join_type=..., on=[(nullable_col@0, col_a@0)]
1473+
SortMergeJoinExec: join_type=..., on=[(nullable_col@0, col_a@0)]
14741474
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
14751475
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
14761476
SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1513,13 +1513,13 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> {
15131513
assert_snapshot!(test.run(), @r"
15141514
Input Plan:
15151515
SortPreservingMergeExec: [col_b@3 ASC, col_a@2 ASC]
1516-
SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
1516+
SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
15171517
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
15181518
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
15191519
15201520
Optimized Plan:
15211521
SortExec: expr=[col_b@3 ASC, nullable_col@0 ASC], preserve_partitioning=[false]
1522-
SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
1522+
SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
15231523
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
15241524
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
15251525
SortExec: expr=[col_a@0 ASC], preserve_partitioning=[false]
@@ -1540,12 +1540,12 @@ async fn test_sort_merge_join_complex_order_by() -> Result<()> {
15401540
assert_snapshot!(test.run(), @r"
15411541
Input Plan:
15421542
SortPreservingMergeExec: [nullable_col@0 ASC, col_b@3 ASC, col_a@2 ASC]
1543-
SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
1543+
SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
15441544
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
15451545
DataSourceExec: file_groups={1 group: [[x]]}, projection=[col_a, col_b], file_type=parquet
15461546
15471547
Optimized Plan:
1548-
SortMergeJoin: join_type=Inner, on=[(nullable_col@0, col_a@0)]
1548+
SortMergeJoinExec: join_type=Inner, on=[(nullable_col@0, col_a@0)]
15491549
SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
15501550
DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
15511551
SortExec: expr=[col_a@0 ASC, col_b@1 ASC], preserve_partitioning=[false]

0 commit comments

Comments
 (0)