From f9fa8405bf428ed20345c28fe2ce2329a8fd3aa4 Mon Sep 17 00:00:00 2001 From: Angela Xie Date: Wed, 15 Oct 2025 14:40:55 -0700 Subject: [PATCH 1/4] add aggregate dependency column back to analyzeHaving, skip having child projection, add select aliases to select dependencies so they can be resolved by order by exprs --- enginetest/queries/tpcc_plans.go | 6 +++--- enginetest/queries/tpch_plans.go | 26 +++++++++++++------------- sql/analyzer/validation_rules.go | 11 +++++++++-- sql/planbuilder/aggregates.go | 1 - 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/enginetest/queries/tpcc_plans.go b/enginetest/queries/tpcc_plans.go index c3c5ecac2f..1210de0655 100644 --- a/enginetest/queries/tpcc_plans.go +++ b/enginetest/queries/tpcc_plans.go @@ -846,7 +846,7 @@ from " │ │ ├─ countdistinct([orders2.o_id]):0!null\n" + " │ │ └─ 1 (bigint)\n" + " │ └─ GroupBy\n" + - " │ ├─ select: COUNTDISTINCT([orders2.o_id]), orders2.o_c_id:3, orders2.o_w_id:2!null, orders2.o_d_id:1!null\n" + + " │ ├─ select: COUNTDISTINCT([orders2.o_id]), orders2.o_c_id:3, orders2.o_w_id:2!null, orders2.o_d_id:1!null, orders2.o_id:0!null\n" + " │ ├─ group: orders2.o_c_id:3, orders2.o_d_id:1!null, orders2.o_w_id:2!null\n" + " │ └─ IndexedTableAccess(orders2)\n" + " │ ├─ index: [orders2.o_w_id,orders2.o_d_id,orders2.o_id]\n" + @@ -880,7 +880,7 @@ from " │ ├─ columns: [orders2.o_c_id, orders2.o_w_id, orders2.o_d_id, countdistinct([orders2.o_id]) as count(distinct o_id)]\n" + " │ └─ Having((countdistinct([orders2.o_id]) > 1))\n" + " │ └─ GroupBy\n" + - " │ ├─ SelectDeps(COUNTDISTINCT([orders2.o_id]), orders2.o_c_id, orders2.o_w_id, orders2.o_d_id)\n" + + " │ ├─ SelectDeps(COUNTDISTINCT([orders2.o_id]), orders2.o_c_id, orders2.o_w_id, orders2.o_d_id, orders2.o_id)\n" + " │ ├─ Grouping(orders2.o_c_id, orders2.o_d_id, orders2.o_w_id)\n" + " │ └─ IndexedTableAccess(orders2)\n" + " │ ├─ index: [orders2.o_w_id,orders2.o_d_id,orders2.o_id]\n" + @@ -905,7 +905,7 @@ from " │ ├─ columns: [orders2.o_c_id, orders2.o_w_id, orders2.o_d_id, countdistinct([orders2.o_id]) as count(distinct o_id)]\n" + " │ └─ Having((countdistinct([orders2.o_id]) > 1))\n" + " │ └─ GroupBy\n" + - " │ ├─ SelectDeps(COUNTDISTINCT([orders2.o_id]), orders2.o_c_id, orders2.o_w_id, orders2.o_d_id)\n" + + " │ ├─ SelectDeps(COUNTDISTINCT([orders2.o_id]), orders2.o_c_id, orders2.o_w_id, orders2.o_d_id, orders2.o_id)\n" + " │ ├─ Grouping(orders2.o_c_id, orders2.o_d_id, orders2.o_w_id)\n" + " │ └─ IndexedTableAccess(orders2)\n" + " │ ├─ index: [orders2.o_w_id,orders2.o_d_id,orders2.o_id]\n" + diff --git a/enginetest/queries/tpch_plans.go b/enginetest/queries/tpch_plans.go index 8de19f804e..8f2d8e9dcb 100644 --- a/enginetest/queries/tpch_plans.go +++ b/enginetest/queries/tpch_plans.go @@ -1734,7 +1734,7 @@ order by value desc;`, ExpectedPlan: "Project\n" + " ├─ columns: [partsupp.ps_partkey:1!null, sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null->value:0]\n" + - " └─ Sort(value:2!null DESC nullsFirst)\n" + + " └─ Sort(value:4!null DESC nullsFirst)\n" + " └─ Having\n" + " ├─ GreaterThan\n" + " │ ├─ sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null\n" + @@ -1752,7 +1752,7 @@ order by " │ │ │ └─ tableId: 4\n" + " │ │ └─ IndexedTableAccess(supplier)\n" + " │ │ ├─ index: [supplier.S_SUPPKEY]\n" + - " │ │ ├─ keys: [partsupp.ps_suppkey:3!null]\n" + + " │ │ ├─ keys: [partsupp.ps_suppkey:5!null]\n" + " │ │ ├─ colSet: (24-30)\n" + " │ │ ├─ tableId: 5\n" + " │ │ └─ Table\n" + @@ -1760,20 +1760,20 @@ order by " │ │ └─ columns: [s_suppkey s_nationkey]\n" + " │ └─ Filter\n" + " │ ├─ Eq\n" + - " │ │ ├─ nation.n_name:4!null\n" + + " │ │ ├─ nation.n_name:6!null\n" + " │ │ └─ GERMANY (longtext)\n" + " │ └─ IndexedTableAccess(nation)\n" + " │ ├─ index: [nation.N_NATIONKEY]\n" + - " │ ├─ keys: [supplier.s_nationkey:5!null]\n" + + " │ ├─ keys: [supplier.s_nationkey:7!null]\n" + " │ ├─ colSet: (31-34)\n" + " │ ├─ tableId: 6\n" + " │ └─ Table\n" + " │ ├─ name: nation\n" + " │ └─ columns: [n_nationkey n_name]\n" + " └─ Project\n" + - " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null, partsupp.ps_partkey:1!null, sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null->value:0]\n" + + " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null, partsupp.ps_partkey:1!null, partsupp.PS_SUPPLYCOST:2!null, partsupp.PS_AVAILQTY:3!null, sum((partsupp.ps_supplycost * partsupp.ps_availqty)):0!null->value:0]\n" + " └─ GroupBy\n" + - " ├─ select: SUM((partsupp.ps_supplycost:3!null * partsupp.ps_availqty:2!null)), partsupp.ps_partkey:0!null\n" + + " ├─ select: SUM((partsupp.ps_supplycost:3!null * partsupp.ps_availqty:2!null)), partsupp.ps_partkey:0!null, partsupp.PS_SUPPLYCOST:3!null, partsupp.PS_AVAILQTY:2!null\n" + " ├─ group: partsupp.ps_partkey:0!null\n" + " └─ LookupJoin\n" + " ├─ LookupJoin\n" + @@ -1807,9 +1807,9 @@ order by " └─ Sort(value DESC)\n" + " └─ Having((sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > Subquery(select sum(ps_supplycost * ps_availqty) * 0.0001000000 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY')))\n" + " └─ Project\n" + - " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)) as value]\n" + + " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, partsupp.PS_SUPPLYCOST, partsupp.PS_AVAILQTY, sum((partsupp.ps_supplycost * partsupp.ps_availqty)) as value]\n" + " └─ GroupBy\n" + - " ├─ SelectDeps(SUM((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey)\n" + + " ├─ SelectDeps(SUM((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, partsupp.PS_SUPPLYCOST, partsupp.PS_AVAILQTY)\n" + " ├─ Grouping(partsupp.ps_partkey)\n" + " └─ LookupJoin\n" + " ├─ LookupJoin\n" + @@ -1829,9 +1829,9 @@ order by " └─ Sort(value DESC)\n" + " └─ Having((sum((partsupp.ps_supplycost * partsupp.ps_availqty)) > Subquery(select sum(ps_supplycost * ps_availqty) * 0.0001000000 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY')))\n" + " └─ Project\n" + - " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, sum((partsupp.ps_supplycost * partsupp.ps_availqty)) as value]\n" + + " ├─ columns: [sum((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, partsupp.PS_SUPPLYCOST, partsupp.PS_AVAILQTY, sum((partsupp.ps_supplycost * partsupp.ps_availqty)) as value]\n" + " └─ GroupBy\n" + - " ├─ SelectDeps(SUM((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey)\n" + + " ├─ SelectDeps(SUM((partsupp.ps_supplycost * partsupp.ps_availqty)), partsupp.ps_partkey, partsupp.PS_SUPPLYCOST, partsupp.PS_AVAILQTY)\n" + " ├─ Grouping(partsupp.ps_partkey)\n" + " └─ LookupJoin\n" + " ├─ LookupJoin\n" + @@ -2655,7 +2655,7 @@ order by " │ ├─ sum(lineitem.l_quantity):0!null\n" + " │ └─ 300 (smallint)\n" + " └─ GroupBy\n" + - " ├─ select: SUM(lineitem_1.l_quantity:4!null), lineitem_1.l_orderkey:0!null\n" + + " ├─ select: SUM(lineitem_1.l_quantity:4!null), lineitem_1.l_orderkey:0!null, lineitem_1.L_QUANTITY:4!null\n" + " ├─ group: lineitem_1.l_orderkey:0!null\n" + " └─ TableAlias(lineitem_1)\n" + " └─ Table\n" + @@ -2686,7 +2686,7 @@ order by " ├─ columns: [lineitem_1.l_orderkey]\n" + " └─ Having((sum(lineitem.l_quantity) > 300))\n" + " └─ GroupBy\n" + - " ├─ SelectDeps(SUM(lineitem_1.l_quantity), lineitem_1.l_orderkey)\n" + + " ├─ SelectDeps(SUM(lineitem_1.l_quantity), lineitem_1.l_orderkey, lineitem_1.L_QUANTITY)\n" + " ├─ Grouping(lineitem_1.l_orderkey)\n" + " └─ TableAlias(lineitem_1)\n" + " └─ Table\n" + @@ -2714,7 +2714,7 @@ order by " ├─ columns: [lineitem_1.l_orderkey]\n" + " └─ Having((sum(lineitem.l_quantity) > 300))\n" + " └─ GroupBy\n" + - " ├─ SelectDeps(SUM(lineitem_1.l_quantity), lineitem_1.l_orderkey)\n" + + " ├─ SelectDeps(SUM(lineitem_1.l_quantity), lineitem_1.l_orderkey, lineitem_1.L_QUANTITY)\n" + " ├─ Grouping(lineitem_1.l_orderkey)\n" + " └─ TableAlias(lineitem_1)\n" + " └─ Table\n" + diff --git a/sql/analyzer/validation_rules.go b/sql/analyzer/validation_rules.go index 334fafc4fc..bfa2ac314e 100644 --- a/sql/analyzer/validation_rules.go +++ b/sql/analyzer/validation_rules.go @@ -251,9 +251,13 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop } var err error + var parent sql.Node var project *plan.Project var orderBy *plan.Sort transform.Inspect(n, func(n sql.Node) bool { + defer func() { + parent = n + }() switch n := n.(type) { case *plan.GroupBy: var noGroupBy bool @@ -334,8 +338,10 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop } } case *plan.Project: - project = n - orderBy = nil + if _, isHaving := parent.(*plan.Having); !isHaving { + project = n + orderBy = nil + } case *plan.Sort: orderBy = n } @@ -406,6 +412,7 @@ func resolveExpr(expr sql.Expression, selectDeps map[string]sql.Expression, grou switch expr := expr.(type) { case *expression.Alias: if dep, ok := selectDeps[strings.ToLower(expr.Child.String())]; ok { + selectDeps[strings.ToLower(expr.Name())] = dep return dep, transform.NewTree, nil } case *expression.GetField: diff --git a/sql/planbuilder/aggregates.go b/sql/planbuilder/aggregates.go index 2e33ee432b..6d98d7f543 100644 --- a/sql/planbuilder/aggregates.go +++ b/sql/planbuilder/aggregates.go @@ -885,7 +885,6 @@ func (b *Builder) analyzeHaving(fromScope, projScope *scope, having *ast.Where) // record aggregate // TODO: this should get projScope as well _ = b.buildAggregateFunc(fromScope, name, n) - return false, nil } else if isWindowFunc(name) { _ = b.buildWindowFunc(fromScope, name, n, (*ast.WindowDef)(n.Over)) } From 777fc9c7d81a930cdfa6794bcdc492113f5d198b Mon Sep 17 00:00:00 2001 From: Angela Xie Date: Wed, 15 Oct 2025 15:02:01 -0700 Subject: [PATCH 2/4] add test --- enginetest/queries/queries.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/enginetest/queries/queries.go b/enginetest/queries/queries.go index 186d7ad058..a491e0ecbd 100644 --- a/enginetest/queries/queries.go +++ b/enginetest/queries/queries.go @@ -9181,6 +9181,11 @@ from typestable`, Query: "select pk, (select max(pk) from one_pk where pk < opk.pk) as x from one_pk opk", Expected: []sql.Row{{0, nil}, {1, 0}, {2, 1}, {3, 2}}, }, + { + // https://github.com/dolthub/dolt/issues/9963 + Query: "select max(i) as max_i from mytable having max(i) < 3", + Expected: []sql.Row{}, + }, } var KeylessQueries = []QueryTest{ From e64408d37e9b9b56845dbf2813330bd26ffcd3f0 Mon Sep 17 00:00:00 2001 From: Angela Xie Date: Wed, 15 Oct 2025 15:33:46 -0700 Subject: [PATCH 3/4] update planbuilder tests --- sql/planbuilder/parse_test.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/planbuilder/parse_test.go b/sql/planbuilder/parse_test.go index ec4899cc96..4159f5aeb0 100644 --- a/sql/planbuilder/parse_test.go +++ b/sql/planbuilder/parse_test.go @@ -230,9 +230,9 @@ Project │ ├─ avg(cte.x):5 │ └─ 0 (tinyint) └─ Project - ├─ columns: [avg(cte.x):5, 1 (tinyint)->x:4] + ├─ columns: [avg(cte.x):5, cte.x:3!null, 1 (tinyint)->x:4] └─ GroupBy - ├─ select: AVG(cte.x:3!null) + ├─ select: AVG(cte.x:3!null), cte.x:3!null ├─ group: └─ SubqueryAlias ├─ name: cte @@ -260,9 +260,9 @@ Project │ ├─ avg(xy.x):5 │ └─ 0 (tinyint) └─ Project - ├─ columns: [avg(xy.x):5, 1 (tinyint)->x:4] + ├─ columns: [avg(xy.x):5, xy.x:1!null, 1 (tinyint)->x:4] └─ GroupBy - ├─ select: AVG(xy.x:1!null) + ├─ select: AVG(xy.x:1!null), xy.x:1!null ├─ group: └─ Table ├─ name: xy @@ -1029,7 +1029,7 @@ Project │ ├─ sum((xy.y * xy.z)):4!null │ └─ 1 (tinyint) └─ GroupBy - ├─ select: SUM((xy.y:2!null * xy.z:3!null)), xy.x:1!null + ├─ select: SUM((xy.y:2!null * xy.z:3!null)), xy.x:1!null, xy.y:2!null, xy.z:3!null ├─ group: xy.x:1!null └─ Table ├─ name: xy @@ -1691,9 +1691,9 @@ Project │ │ ├─ count(uv.u):7!null │ │ └─ 1 (bigint) │ └─ Project - │ ├─ columns: [count(uv.u):7!null, count(uv.u):7!null->count_1:8] + │ ├─ columns: [count(uv.u):7!null, uv.u:4!null, count(uv.u):7!null->count_1:8] │ └─ GroupBy - │ ├─ select: COUNT(uv.u:4!null) + │ ├─ select: COUNT(uv.u:4!null), uv.u:4!null │ ├─ group: uv.u:4!null │ └─ Filter │ ├─ Eq @@ -2156,9 +2156,9 @@ Project ├─ NOT │ └─ avg(-xy.y):5 IS NULL └─ Project - ├─ columns: [avg(-xy.y):5, xy.x:1!null, xy.x:1!null->y:4] + ├─ columns: [avg(-xy.y):5, xy.x:1!null, xy.y:2!null, xy.x:1!null->y:4] └─ GroupBy - ├─ select: AVG(-xy.y), xy.x:1!null + ├─ select: AVG(-xy.y), xy.x:1!null, xy.y:2!null ├─ group: xy.x:1!null └─ Table ├─ name: xy From 17278ed82987bccbfba58ced2dcacaabc399093d Mon Sep 17 00:00:00 2001 From: Angela Xie Date: Wed, 15 Oct 2025 16:39:17 -0700 Subject: [PATCH 4/4] added explanation comment --- sql/analyzer/validation_rules.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/analyzer/validation_rules.go b/sql/analyzer/validation_rules.go index bfa2ac314e..b86074494f 100644 --- a/sql/analyzer/validation_rules.go +++ b/sql/analyzer/validation_rules.go @@ -338,6 +338,8 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop } } case *plan.Project: + // Project nodes that are direct children of Having nodes include aliases for columns that are part of an + // aggregate function that aren't necessarily selected expressions and therefore shouldn't be validated if _, isHaving := parent.(*plan.Having); !isHaving { project = n orderBy = nil