Fix: Show aliased aggregate expressions in physical EXPLAIN output (#19685)

GaneshPatil7517 · GaneshPatil7517 · commit 4ef18bde38db · 2026-01-09T21:51:19.000+05:30
When an aggregate expression has been aliased, the logical plan EXPLAIN
shows both the alias and the original expression. However, the physical
plan EXPLAIN only showed the alias, making plans hard to interpret.

This fix updates the physical EXPLAIN output to show both the underlying
aggregate expression and its alias in the format:

  AggregateExec: mode=Single, gby=[], aggr=[sum(column1@0) as my_alias]

instead of:

  AggregateExec: mode=Single, gby=[], aggr=[my_alias]

Changes:
- Modified create_aggregate_expr_and_maybe_filter() in physical_planner.rs
  to use the unaliased expression for human_display, so it captures the
  actual aggregate expression instead of just the alias name.
- Modified DisplayAs impl for AggregateExec to show both expression and
  alias when they differ.
- Updated test expectations in explain.slt, aggregate.slt, and
  agg_func_substitute.slt to reflect the new output format.
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -2189,7 +2189,11 @@ pub fn create_aggregate_expr_and_maybe_filter(
     let (name, human_display, e) = match e {
         Expr::Alias(Alias { name, .. }) => {
             let unaliased = e.clone().unalias_nested().data;
-            (Some(name.clone()), e.human_display().to_string(), unaliased)
+            // Use the unaliased expression for human_display so that the
+            // physical plan EXPLAIN shows the actual aggregate expression,
+            // not just the alias name
+            let display = unaliased.human_display().to_string();
+            (Some(name.clone()), display, unaliased)
         }
         Expr::AggregateFunction(_) => (
             Some(e.schema_name().to_string()),
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1116,7 +1116,16 @@ impl DisplayAs for AggregateExec {
                 let a: Vec<String> = self
                     .aggr_expr
                     .iter()
-                    .map(|agg| agg.name().to_string())
+                    .map(|agg| {
+                        let expr_display = agg.human_display();
+                        let alias = agg.name();
+                        // Show the expression with alias if they differ
+                        if !expr_display.is_empty() && expr_display != alias {
+                            format!("{expr_display} as {alias}")
+                        } else {
+                            alias.to_string()
+                        }
+                    })
                     .collect();
                 write!(f, ", aggr=[{}]", a.join(", "))?;
                 if let Some(limit) = self.limit {
diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
@@ -44,10 +44,10 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
-02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
 04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
@@ -63,10 +63,10 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
-02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
 04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 1) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
@@ -81,10 +81,10 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
-02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 101) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[true]
 04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[nth_value(multiple_ordered_table.c, 101) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] as nth_value(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1, maintains_sort_order=true
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3647,9 +3647,9 @@ logical_plan
 01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]]
 02)--TableScan: aggregate_test_100 projection=[c2]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
+01)AggregateExec: mode=Final, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 ASC NULLS LAST]]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
 
@@ -3660,9 +3660,9 @@ logical_plan
 01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]]
 02)--TableScan: aggregate_test_100 projection=[c2]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
+01)AggregateExec: mode=Final, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(Float64(0)) WITHIN GROUP [aggregate_test_100.c2 DESC NULLS FIRST]]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
 
@@ -3673,9 +3673,9 @@ logical_plan
 01)Aggregate: groupBy=[[]], aggr=[[min(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(0))]]
 02)--TableScan: aggregate_test_100 projection=[c2]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
+01)AggregateExec: mode=Final, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(0))]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(0))]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[min(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(0))]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
 
@@ -3686,9 +3686,9 @@ logical_plan
 01)Aggregate: groupBy=[[]], aggr=[[max(CAST(aggregate_test_100.c2 AS Float64)) AS percentile_cont(aggregate_test_100.c2,Float64(1))]]
 02)--TableScan: aggregate_test_100 projection=[c2]
 physical_plan
-01)AggregateExec: mode=Final, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
+01)AggregateExec: mode=Final, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(1))]
 02)--CoalescePartitionsExec
-03)----AggregateExec: mode=Partial, gby=[], aggr=[percentile_cont(aggregate_test_100.c2,Float64(1))]
+03)----AggregateExec: mode=Partial, gby=[], aggr=[max(aggregate_test_100.c2) as percentile_cont(aggregate_test_100.c2,Float64(1))]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[c2], file_type=csv, has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
@@ -660,5 +660,38 @@ logical_plan
 
 # unknown mode
 
+# Test that aliased aggregate expressions are visible in physical explain output
+# Issue: https://github.com/apache/datafusion/issues/19685
+statement ok
+create table agg_alias_test (column1 int, column2 int) as values (1, 100), (2, 200), (3, 300);
+
+query TT
+EXPLAIN SELECT sum(column1) AS my_sum FROM agg_alias_test;
+----
+logical_plan
+01)Projection: sum(agg_alias_test.column1) AS my_sum
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(agg_alias_test.column1 AS Int64))]]
+03)----TableScan: agg_alias_test projection=[column1]
+physical_plan
+01)ProjectionExec: expr=[sum(agg_alias_test.column1)@0 as my_sum]
+02)--AggregateExec: mode=Single, gby=[], aggr=[sum(agg_alias_test.column1)]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+
+# Test with filter clause on aggregate
+query TT
+EXPLAIN SELECT sum(column1) FILTER (WHERE column2 <= 200) AS filtered_sum FROM agg_alias_test;
+----
+logical_plan
+01)Projection: sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200)) AS filtered_sum
+02)--Aggregate: groupBy=[[]], aggr=[[sum(CAST(agg_alias_test.column1 AS Int64)) FILTER (WHERE agg_alias_test.column2 <= Int32(200)) AS sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))]]
+03)----TableScan: agg_alias_test projection=[column1, column2]
+physical_plan
+01)ProjectionExec: expr=[sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))@0 as filtered_sum]
+02)--AggregateExec: mode=Single, gby=[], aggr=[sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int32(200)) as sum(agg_alias_test.column1) FILTER (WHERE agg_alias_test.column2 <= Int64(200))]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+drop table agg_alias_test;
+
 statement ok
 drop table foo;