Add more tests

LiaCastaneda · LiaCastaneda · commit 3ebf2598af02 · 2025-11-03T13:38:00.000-05:00
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
@@ -1956,73 +1956,55 @@ async fn test_aggregate_filter_pushdown() {
     );
 }
 
-#[tokio::test]
-async fn test_no_pushdown_aggregate_filter_on_non_grouping_column() {
-    // Test that filters on non-grouping columns (like aggregate results) are NOT pushed through
-    // Simulates: SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5
-    // The filter on 'cnt' cannot be pushed down because it's an aggregate result, not a grouping column
+#[test]
+fn test_no_pushdown_aggregate_filter_on_non_grouping_column() {
+    // Test that filters on non-first grouping columns are still pushed down
+    // SELECT a, b, count(*) as cnt FROM table GROUP BY a, b HAVING b = 'bar'
+    // The filter is on 'b' (second grouping column), should push down
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
-    let batches =
+    let aggregate_expr =
         vec![
-            record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
         ];
 
-    let scan = TestScanBuilder::new(schema())
-        .with_support(true)
-        .with_batches(batches)
-        .build();
-
-    // Create an aggregate: GROUP BY a with COUNT(b)
-    let group_by = PhysicalGroupBy::new_single(vec![(
-        col("a", &schema()).unwrap(),
-        "a".to_string(),
-    )]);
-
-    // Add COUNT aggregate
-    let count_expr =
-        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
-            .schema(schema())
-            .alias("count")
-            .build()
-            .unwrap();
+    let group_by = PhysicalGroupBy::new_single(vec![
+        (col("a", &schema()).unwrap(), "a".to_string()),
+        (col("b", &schema()).unwrap(), "b".to_string()),
+    ]);
 
     let aggregate = Arc::new(
         AggregateExec::try_new(
-            AggregateMode::Partial,
+            AggregateMode::Final,
             group_by,
-            vec![count_expr.into()],
+            aggregate_expr.clone(),
             vec![None],
-            Arc::clone(&scan),
+            scan,
             schema(),
         )
         .unwrap(),
     );
 
-    // Add a filter on the aggregate output column
-    // This simulates filtering on COUNT result, which should NOT be pushed through
-    let agg_schema = aggregate.schema();
-    let predicate = Arc::new(BinaryExpr::new(
-        Arc::new(Column::new_with_schema("count[count]", &agg_schema).unwrap()),
-        Operator::Gt,
-        Arc::new(Literal::new(ScalarValue::Int64(Some(5)))),
-    ));
-    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
-        as Arc<dyn ExecutionPlan>;
+    let predicate = col_lit_predicate("b", "bar", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
 
-    // The filter should NOT be pushed through the aggregate since it references a non-grouping column
     insta::assert_snapshot!(
-        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
     OptimizationTest:
       input:
-        - FilterExec: count[count]@1 > 5
-        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
+        - FilterExec: b@1 = bar
+        -   AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt]
         -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
       output:
         Ok:
-          - FilterExec: count[count]@1 > 5
-          -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
-          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+          - AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=PartiallySorted([1])
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
     "
     );
 }
@@ -2165,3 +2147,118 @@ fn test_pushdown_grouping_sets_filter_on_common_column() {
     "
     );
 }
+
+#[test]
+fn test_pushdown_with_empty_group_by() {
+    // Test that filters can be pushed down when GROUP BY is empty (no grouping columns)
+    // SELECT count(*) as cnt FROM table WHERE a = 'foo'
+    // There are no grouping columns, so the filter should still push down
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    // Empty GROUP BY - no grouping columns
+    let group_by = PhysicalGroupBy::new_single(vec![]);
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            scan,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Filter on 'a'
+    let predicate = col_lit_predicate("a", "foo", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
+
+    // The filter should be pushed down even with empty GROUP BY
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   AggregateExec: mode=Final, gby=[], aggr=[cnt]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[], aggr=[cnt]
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_with_computed_grouping_key() {
+    // Test filter pushdown with computed grouping expression
+    // SELECT (c + 1.0) as c_plus_1, count(*) FROM table WHERE c > 5.0 GROUP BY (c + 1.0)
+
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let predicate = Arc::new(BinaryExpr::new(
+        col("c", &schema()).unwrap(),
+        Operator::Gt,
+        Arc::new(Literal::new(ScalarValue::Float64(Some(5.0)))),
+    )) as Arc<dyn PhysicalExpr>;
+    let filter = Arc::new(FilterExec::try_new(predicate, scan).unwrap());
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    let c_plus_one = Arc::new(BinaryExpr::new(
+        col("c", &schema()).unwrap(),
+        Operator::Plus,
+        Arc::new(Literal::new(ScalarValue::Float64(Some(1.0)))),
+    )) as Arc<dyn PhysicalExpr>;
+
+    let group_by =
+        PhysicalGroupBy::new_single(vec![(c_plus_one, "c_plus_1".to_string())]);
+
+    let plan = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            filter,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // The filter should be pushed down because 'c' is extracted from the grouping expression (c + 1.0)
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - AggregateExec: mode=Final, gby=[c@2 + 1 as c_plus_1], aggr=[cnt]
+        -   FilterExec: c@2 > 5
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[c@2 + 1 as c_plus_1], aggr=[cnt]
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=c@2 > 5
+    "
+    );
+}
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -1032,6 +1032,8 @@ impl ExecutionPlan for AggregateExec {
         CardinalityEffect::LowerEqual
     }
 
+    /// Push down parent filters when possible (see implementation comment for details),
+    /// but do not introduce any new self filters.
     fn gather_filters_for_pushdown(
         &self,
         _phase: FilterPushdownPhase,

Original file line number	Diff line number	Diff line change
`@@ -1032,6 +1032,8 @@ impl ExecutionPlan for AggregateExec {`
`1032`	`1032`	`CardinalityEffect::LowerEqual`
`1033`	`1033`	`}`
`1034`	`1034`
	`1035`	`+ /// Push down parent filters when possible (see implementation comment for details),`
	`1036`	`+ /// but do not introduce any new self filters.`
`1035`	`1037`	`fn gather_filters_for_pushdown(`
`1036`	`1038`	`&self,`
`1037`	`1039`	`_phase: FilterPushdownPhase,`