Skip to content

Commit 4ef18bd

Browse files
Fix: Show aliased aggregate expressions in physical EXPLAIN output (#19685)
When an aggregate expression has been aliased, the logical plan EXPLAIN shows both the alias and the original expression. However, the physical plan EXPLAIN only showed the alias, making plans hard to interpret. This fix updates the physical EXPLAIN output to show both the underlying aggregate expression and its alias in the format: AggregateExec: mode=Single, gby=[], aggr=[sum(column1@0) as my_alias] instead of: AggregateExec: mode=Single, gby=[], aggr=[my_alias] Changes: - Modified create_aggregate_expr_and_maybe_filter() in physical_planner.rs to use the unaliased expression for human_display, so it captures the actual aggregate expression instead of just the alias name. - Modified DisplayAs impl for AggregateExec to show both expression and alias when they differ. - Updated test expectations in explain.slt, aggregate.slt, and agg_func_substitute.slt to reflect the new output format.
1 parent 209a0a2 commit 4ef18bd

File tree

5 files changed

+62
-16
lines changed

5 files changed

+62
-16
lines changed

datafusion/core/src/physical_planner.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2189,7 +2189,11 @@ pub fn create_aggregate_expr_and_maybe_filter(
21892189
let (name, human_display, e) = match e {
21902190
Expr::Alias(Alias { name, .. }) => {
21912191
let unaliased = e.clone().unalias_nested().data;
2192-
(Some(name.clone()), e.human_display().to_string(), unaliased)
2192+
// Use the unaliased expression for human_display so that the
2193+
// physical plan EXPLAIN shows the actual aggregate expression,
2194+
// not just the alias name
2195+
let display = unaliased.human_display().to_string();
2196+
(Some(name.clone()), display, unaliased)
21932197
}
21942198
Expr::AggregateFunction(_) => (
21952199
Some(e.schema_name().to_string()),

datafusion/physical-plan/src/aggregates/mod.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1116,7 +1116,16 @@ impl DisplayAs for AggregateExec {
11161116
let a: Vec<String> = self
11171117
.aggr_expr
11181118
.iter()
1119-
.map(|agg| agg.name().to_string())
1119+
.map(|agg| {
1120+
let expr_display = agg.human_display();
1121+
let alias = agg.name();
1122+
// Show the expression with alias if they differ
1123+
if !expr_display.is_empty() && expr_display != alias {
1124+
format!("{expr_display} as {alias}")
1125+
} else {
1126+
alias.to_string()
1127+
}
1128+
})
11201129
.collect();
11211130
write!(f, ", aggr=[{}]", a.join(", "))?;
11221131
if let Some(limit) = self.limit {

datafusion/sqllogictest/test_files/agg_func_substitute.slt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ logical_plan
4444
03)----TableScan: multiple_ordered_table projection=[a, c]
4545
physical_plan
4646
01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
47-
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
47+
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
4848
03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
4949
04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
50-
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
50+
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
5151
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
5252
07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
5353

@@ -63,10 +63,10 @@ logical_plan
6363
03)----TableScan: multiple_ordered_table projection=[a, c]
6464
physical_plan
6565
01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
66-
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
66+
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
6767
03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
6868
04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
69-
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
69+
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
7070
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
7171
07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
7272

@@ -81,10 +81,10 @@ logical_plan
8181
03)----TableScan: multiple_ordered_table projection=[a, c]
8282
physical_plan
8383
01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
84-
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
84+
02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 101) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
8585
03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
8686
04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
87-
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
87+
05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 101) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
8888
06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
8989
07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
9090

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3647,9 +3647,9 @@ logical_plan
36473647
01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]]
36483648
02)--TableScan: aggregate_test_100 projection=[c2]
36493649
physical_plan
3650-
01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
3650+
01)AggregateExec: mode=Final, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
36513651
02)--CoalescePartitionsExec
3652-
03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
3652+
03)----AggregateExec: mode=Partial, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
36533653
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
36543654
05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
36553655

@@ -3660,9 +3660,9 @@ logical_plan
36603660
01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]]
36613661
02)--TableScan: aggregate_test_100 projection=[c2]
36623662
physical_plan
3663-
01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
3663+
01)AggregateExec: mode=Final, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
36643664
02)--CoalescePartitionsExec
3665-
03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
3665+
03)----AggregateExec: mode=Partial, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
36663666
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
36673667
05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
36683668

@@ -3673,9 +3673,9 @@ logical_plan
36733673
01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(0))]]
36743674
02)--TableScan: aggregate_test_100 projection=[c2]
36753675
physical_plan
3676-
01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
3676+
01)AggregateExec: mode=Final, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(0))]
36773677
02)--CoalescePartitionsExec
3678-
03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
3678+
03)----AggregateExec: mode=Partial, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(0))]
36793679
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
36803680
05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
36813681

@@ -3686,9 +3686,9 @@ logical_plan
36863686
01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(1))]]
36873687
02)--TableScan: aggregate_test_100 projection=[c2]
36883688
physical_plan
3689-
01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
3689+
01)AggregateExec: mode=Final, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(1))]
36903690
02)--CoalescePartitionsExec
3691-
03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
3691+
03)----AggregateExec: mode=Partial, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(1))]
36923692
04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
36933693
05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
36943694

datafusion/sqllogictest/test_files/explain.slt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,5 +660,38 @@ logical_plan
660660

661661
# unknown mode
662662

663+
# Test that aliased aggregate expressions are visible in physical explain output
664+
# Issue: https://github.com/apache/datafusion/issues/19685
665+
statement ok
666+
create table agg_alias_test (column1 int, column2 int) as values (1, 100), (2, 200), (3, 300);
667+
668+
query TT
669+
EXPLAIN SELECT sum(column1) AS my_sum FROM agg_alias_test;
670+
----
671+
logical_plan
672+
01)Projection: sum(agg_alias_test.column1) AS my_sum
673+
02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(agg_alias_test.column1 AS Int64))]]
674+
03)----TableScan: agg_alias_test projection=[column1]
675+
physical_plan
676+
01)ProjectionExec: expr=[sum(agg_alias_test.column1)@0 as my_sum]
677+
02)--AggregateExec: mode=Single, gby=[], aggr=[sum(agg_alias_test.column1)]
678+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
679+
680+
# Test with filter clause on aggregate
681+
query TT
682+
EXPLAIN SELECT sum(column1) FILTER (WHERE column2 <= 200) AS filtered_sum FROM agg_alias_test;
683+
----
684+
logical_plan
685+
01)Projection: sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200)) AS filtered_sum
686+
02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(agg_alias_test.column1 AS Int64)) FILTER (WHERE agg_alias_test.column2 <= Int32(200)) AS sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))]]
687+
03)----TableScan: agg_alias_test projection=[column1, column2]
688+
physical_plan
689+
01)ProjectionExec: expr=[sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))@0 as filtered_sum]
690+
02)--AggregateExec: mode=Single, gby=[], aggr=[sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int32(200)) as sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))]
691+
03)----DataSourceExec: partitions=1, partition_sizes=[1]
692+
693+
statement ok
694+
drop table agg_alias_test;
695+
663696
statement ok
664697
drop table foo;

0 commit comments

Comments
 (0)