Skip to content

Commit 1d7ae50

Browse files
committed
allow group by pk, need to fix typing issue
1 parent 7528777 commit 1d7ae50

File tree

6 files changed

+82
-94
lines changed

6 files changed

+82
-94
lines changed

enginetest/queries/logic_test_scripts.go

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,39 +1004,40 @@ var SQLLogicSubqueryTests = []ScriptTest{
10041004
},
10051005
},
10061006
},
1007-
//{
1008-
// Name: "multiple nested subquery",
1009-
// SetUpScript: []string{
1010-
// "CREATE TABLE `groups`(id SERIAL PRIMARY KEY, data JSON);",
1011-
// "INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 1\", \"members\": [{\"name\": \"admin\", \"type\": \"USER\"}, {\"name\": \"user\", \"type\": \"USER\"}]}');",
1012-
// "INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 2\", \"members\": [{\"name\": \"admin2\", \"type\": \"USER\"}]}');",
1013-
// "CREATE TABLE t32786 (id VARCHAR(36) PRIMARY KEY, parent_id VARCHAR(36), parent_path text);",
1014-
// "INSERT INTO t32786 VALUES ('3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null, null);",
1015-
// "INSERT INTO t32786 VALUES ('5AE7EAFD-8277-4F41-83DE-0FD4B4482169', '3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null);",
1016-
// "CREATE TABLE users (id INT8 NOT NULL, name VARCHAR(50), PRIMARY KEY (id));",
1017-
// "INSERT INTO users(id, name) VALUES (1, 'user1');",
1018-
// "INSERT INTO users(id, name) VALUES (2, 'user2');",
1019-
// "INSERT INTO users(id, name) VALUES (3, 'user3');",
1020-
// "CREATE TABLE stuff(id INT8 NOT NULL, date DATE, user_id INT8, PRIMARY KEY (id), FOREIGN KEY (user_id) REFERENCES users (id));",
1021-
// "INSERT INTO stuff(id, date, user_id) VALUES (1, '2007-10-15', 1);",
1022-
// "INSERT INTO stuff(id, date, user_id) VALUES (2, '2007-12-15', 1);",
1023-
// "INSERT INTO stuff(id, date, user_id) VALUES (3, '2007-11-15', 1);",
1024-
// "INSERT INTO stuff(id, date, user_id) VALUES (4, '2008-01-15', 2);",
1025-
// "INSERT INTO stuff(id, date, user_id) VALUES (5, '2007-06-15', 3);",
1026-
// "INSERT INTO stuff(id, date, user_id) VALUES (6, '2007-03-15', 3);",
1027-
// },
1028-
// Assertions: []ScriptTestAssertion{
1029-
// {
1030-
// Skip: true,
1031-
// Query: "SELECT users.id AS users_id, users.name AS users_name, stuff_1.id AS stuff_1_id, stuff_1.date AS stuff_1_date, stuff_1.user_id AS stuff_1_user_id FROM users LEFT JOIN stuff AS stuff_1 ON users.id = stuff_1.user_id AND stuff_1.id = (SELECT stuff_2.id FROM stuff AS stuff_2 WHERE stuff_2.user_id = users.id ORDER BY stuff_2.date DESC LIMIT 1) ORDER BY users.name;",
1032-
// Expected: []sql.Row{
1033-
// {1, "user1", 2, 2007-12-15, 1},
1034-
// {2, "user2", 4, 2008-01-15, 2},
1035-
// {3, "user3", 5, 2007-06-15, 3},
1036-
// },
1037-
// },
1038-
// },
1039-
//},
1007+
{
1008+
Skip: true,
1009+
Name: "multiple nested subquery",
1010+
SetUpScript: []string{
1011+
"CREATE TABLE `groups`(id SERIAL PRIMARY KEY, data JSON);",
1012+
"INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 1\", \"members\": [{\"name\": \"admin\", \"type\": \"USER\"}, {\"name\": \"user\", \"type\": \"USER\"}]}');",
1013+
"INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 2\", \"members\": [{\"name\": \"admin2\", \"type\": \"USER\"}]}');",
1014+
"CREATE TABLE t32786 (id VARCHAR(36) PRIMARY KEY, parent_id VARCHAR(36), parent_path text);",
1015+
"INSERT INTO t32786 VALUES ('3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null, null);",
1016+
"INSERT INTO t32786 VALUES ('5AE7EAFD-8277-4F41-83DE-0FD4B4482169', '3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null);",
1017+
"CREATE TABLE users (id INT8 NOT NULL, name VARCHAR(50), PRIMARY KEY (id));",
1018+
"INSERT INTO users(id, name) VALUES (1, 'user1');",
1019+
"INSERT INTO users(id, name) VALUES (2, 'user2');",
1020+
"INSERT INTO users(id, name) VALUES (3, 'user3');",
1021+
"CREATE TABLE stuff(id INT8 NOT NULL, date DATE, user_id INT8, PRIMARY KEY (id), FOREIGN KEY (user_id) REFERENCES users (id));",
1022+
"INSERT INTO stuff(id, date, user_id) VALUES (1, '2007-10-15', 1);",
1023+
"INSERT INTO stuff(id, date, user_id) VALUES (2, '2007-12-15', 1);",
1024+
"INSERT INTO stuff(id, date, user_id) VALUES (3, '2007-11-15', 1);",
1025+
"INSERT INTO stuff(id, date, user_id) VALUES (4, '2008-01-15', 2);",
1026+
"INSERT INTO stuff(id, date, user_id) VALUES (5, '2007-06-15', 3);",
1027+
"INSERT INTO stuff(id, date, user_id) VALUES (6, '2007-03-15', 3);",
1028+
},
1029+
Assertions: []ScriptTestAssertion{
1030+
{
1031+
Skip: true,
1032+
Query: "SELECT users.id AS users_id, users.name AS users_name, stuff_1.id AS stuff_1_id, stuff_1.date AS stuff_1_date, stuff_1.user_id AS stuff_1_user_id FROM users LEFT JOIN stuff AS stuff_1 ON users.id = stuff_1.user_id AND stuff_1.id = (SELECT stuff_2.id FROM stuff AS stuff_2 WHERE stuff_2.user_id = users.id ORDER BY stuff_2.date DESC LIMIT 1) ORDER BY users.name;",
1033+
Expected: []sql.Row{
1034+
{1, "user1", 2, "2007 - 12 - 15", 1},
1035+
{2, "user2", 4, "2008 - 01 - 15", 2},
1036+
{3, "user3", 5, "2007 - 06 - 15", 3},
1037+
},
1038+
},
1039+
},
1040+
},
10401041
{
10411042
Name: "multiple nested subquery again",
10421043
SetUpScript: []string{

enginetest/queries/queries.go

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -10381,6 +10381,27 @@ from typestable`,
1038110381
{2, "second row"},
1038210382
},
1038310383
},
10384+
{
10385+
Query: "select * from two_pk group by pk1, pk2",
10386+
Expected: []sql.Row{
10387+
{0, 0, 0, 1, 2, 3, 4},
10388+
{0, 1, 10, 11, 12, 13, 14},
10389+
{1, 0, 20, 21, 22, 23, 24},
10390+
{1, 1, 30, 31, 32, 33, 34},
10391+
},
10392+
},
10393+
{
10394+
Query: "select pk1+1 from two_pk group by pk1 + 1, mod(pk2, 2)",
10395+
Expected: []sql.Row{
10396+
{1}, {1}, {2}, {2},
10397+
},
10398+
},
10399+
{
10400+
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
10401+
Expected: []sql.Row{
10402+
{0}, {1}, {0}, {1},
10403+
},
10404+
},
1038410405
}
1038510406

1038610407
var KeylessQueries = []QueryTest{
@@ -11399,6 +11420,15 @@ var ErrorQueries = []QueryErrorTest{
1139911420
Query: "SELECT 1 INTO mytable;",
1140011421
ExpectedErr: sql.ErrUndeclaredVariable,
1140111422
},
11423+
{
11424+
Query: "select * from two_pk group by pk1",
11425+
ExpectedErr: analyzererrors.ErrValidationGroupBy,
11426+
},
11427+
{
11428+
// Grouping over functions and math expressions over PK does not count, and must appear in select
11429+
Query: "select * from two_pk group by pk1 + 1, mod(pk2, 2)",
11430+
ExpectedErr: analyzererrors.ErrValidationGroupBy,
11431+
},
1140211432
}
1140311433

1140411434
var BrokenErrorQueries = []QueryErrorTest{
@@ -11427,39 +11457,11 @@ var BrokenErrorQueries = []QueryErrorTest{
1142711457
Query: "SELECT floor(cor0.col1) * ceil(cor0.col0) AS col2 FROM tab1 AS cor0 GROUP BY cor0.col0",
1142811458
ExpectedErr: analyzererrors.ErrValidationGroupBy,
1142911459
},
11430-
{
11431-
Query: "select * from two_pk group by pk1, pk2",
11432-
// No error
11433-
},
11434-
{
11435-
Query: "select * from two_pk group by pk1",
11436-
ExpectedErr: analyzererrors.ErrValidationGroupBy,
11437-
},
11438-
{
11439-
// Grouping over functions and math expressions over PK does not count, and must appear in select
11440-
Query: "select * from two_pk group by pk1 + 1, mod(pk2, 2)",
11441-
ExpectedErr: analyzererrors.ErrValidationGroupBy,
11442-
},
11443-
{
11444-
// Grouping over functions and math expressions over PK does not count, and must appear in select
11445-
Query: "select pk1+1 from two_pk group by pk1 + 1, mod(pk2, 2)",
11446-
// No error
11447-
},
11448-
{
11449-
// Grouping over functions and math expressions over PK does not count, and must appear in select
11450-
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
11451-
// No error
11452-
},
11453-
{
11454-
// Grouping over functions and math expressions over PK does not count, and must appear in select
11455-
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
11456-
// No error
11457-
},
1145811460
{
1145911461
Query: `SELECT any_value(pk), (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) AS x
1146011462
FROM one_pk opk WHERE (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) > 0
1146111463
GROUP BY (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) ORDER BY x`,
11462-
// No error, but we get opk.pk does not exist
11464+
// No error, but we get opk.pk does not exist (aliasing error)
1146311465
},
1146411466
// Unimplemented JSON functions
1146511467
{

sql/analyzer/validation_rules.go

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop
262262

263263
switch parent.(type) {
264264
case *plan.Having, *plan.Project, *plan.Sort:
265-
// TODO: these shouldn't be skipped; you can group by primary key without problem b/c only one value
266-
// https://dev.mysql.com/doc/refman/8.0/en/group-by-handling.html#:~:text=The%20query%20is%20valid%20if%20name%20is%20a%20primary%20key
265+
// TODO: these shouldn't be skipped but we currently aren't able to validate GroupBys with selected aliased
266+
// expressions and a lot of our tests group by aliases
267267
return true
268268
}
269269

@@ -276,30 +276,27 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop
276276
primaryKeys := make(map[string]bool)
277277
for _, col := range gb.Child.Schema() {
278278
if col.PrimaryKey {
279-
primaryKeys[strings.ToLower(col.Name)] = true
279+
primaryKeys[strings.ToLower(col.String())] = true
280280
}
281281
}
282282

283283
groupBys := make(map[string]bool)
284-
groupByAliases := make(map[string]bool)
285284
groupByPrimaryKeys := 0
286285
for _, expr := range gb.GroupByExprs {
287286
exprStr := strings.ToLower(expr.String())
288287
groupBys[exprStr] = true
289288
if primaryKeys[exprStr] {
290289
groupByPrimaryKeys++
291290
}
292-
if _, ok := expr.(sql.Aggregation); ok {
293-
groupByAliases[exprStr] = true
294-
}
295291
}
296292

293+
// TODO: also allow grouping by unique non-nullable columns
297294
if len(primaryKeys) != 0 && groupByPrimaryKeys == len(primaryKeys) {
298295
return true
299296
}
300297

301298
for _, expr := range gb.SelectedExprs {
302-
if !expressionReferencesOnlyGroupBys(groupBys, groupByAliases, expr) {
299+
if !expressionReferencesOnlyGroupBys(groupBys, expr) {
303300
err = analyzererrors.ErrValidationGroupBy.New(expr.String())
304301
return false
305302
}
@@ -310,7 +307,7 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop
310307
return n, transform.SameTree, err
311308
}
312309

313-
func expressionReferencesOnlyGroupBys(groupBys, groupByAliases map[string]bool, expr sql.Expression) bool {
310+
func expressionReferencesOnlyGroupBys(groupBys map[string]bool, expr sql.Expression) bool {
314311
valid := true
315312
sql.Inspect(expr, func(expr sql.Expression) bool {
316313
exprStr := strings.ToLower(expr.String())

sql/column.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func (c Column) Copy() *Column {
130130
return &c
131131
}
132132

133+
func (c *Column) String() string {
134+
return c.Source + "." + c.Name
135+
}
136+
133137
// TableId is the unique identifier of a table or table alias in a multi-db environment.
134138
// The long-term goal is to migrate all uses of table name strings to this and minimize places where we
135139
// construct/inspect TableIDs. By treating this as an opaque identifier, it will be easier to migrate to

sql/plan/group_by.go

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,8 @@ var ErrGroupBy = errors.NewKind("group by aggregation '%v' not supported")
3030
// GroupBy groups the rows by some expressions.
3131
type GroupBy struct {
3232
UnaryNode
33-
// SelectedExprs are projection dependencies. They are not the explicit select expressions. For example, given the
34-
// query "SELECT pk div 2 from one_pk group by 1," SelectedExprs would contain a GetField for one_pk.pk even though
35-
// the explicit select expression is "pk div 2".
3633
SelectedExprs []sql.Expression
3734
GroupByExprs []sql.Expression
38-
// SelectedAliasMap maps a projection dependency to an alias if it was part of one. For example, given the query
39-
// "SELECT pk div 2, pk + 3 from one_pk group by 2", SelectedAliasMap would contain a mapping from the GetField for
40-
// one_pk.pk to the Alias for "pk div 2" and to the Alias for "pk + 3". This allows for projection dependencies to
41-
// be validated for GroupBy dependencies. Since SelectedAliasMap is only used for validating GroupBys dependencies,
42-
// the expressions are stored as strings. A nested map is used for faster access during validation.
43-
SelectedAliasMap map[string][]string
4435
}
4536

4637
var _ sql.Expressioner = (*GroupBy)(nil)
@@ -52,12 +43,11 @@ var _ sql.CollationCoercible = (*GroupBy)(nil)
5243
// will appear in the output of the query. Some of these fields may be aggregate functions, some may be columns or
5344
// other expressions. Unlike a project, the GroupBy also has a list of group-by expressions, which usually also appear
5445
// in the list of selected expressions.
55-
func NewGroupBy(selectedExprs, groupByExprs []sql.Expression, selectedAliasMap map[string][]string, child sql.Node) *GroupBy {
46+
func NewGroupBy(selectedExprs, groupByExprs []sql.Expression, child sql.Node) *GroupBy {
5647
return &GroupBy{
57-
UnaryNode: UnaryNode{Child: child},
58-
SelectedExprs: selectedExprs,
59-
GroupByExprs: groupByExprs,
60-
SelectedAliasMap: selectedAliasMap,
48+
UnaryNode: UnaryNode{Child: child},
49+
SelectedExprs: selectedExprs,
50+
GroupByExprs: groupByExprs,
6151
}
6252
}
6353

@@ -111,7 +101,7 @@ func (g *GroupBy) WithChildren(children ...sql.Node) (sql.Node, error) {
111101
return nil, sql.ErrInvalidChildrenNumber.New(g, len(children), 1)
112102
}
113103

114-
return NewGroupBy(g.SelectedExprs, g.GroupByExprs, g.SelectedAliasMap, children[0]), nil
104+
return NewGroupBy(g.SelectedExprs, g.GroupByExprs, children[0]), nil
115105
}
116106

117107
// CollationCoercibility implements the interface sql.CollationCoercible.
@@ -132,7 +122,7 @@ func (g *GroupBy) WithExpressions(exprs ...sql.Expression) (sql.Node, error) {
132122
grouping := make([]sql.Expression, len(g.GroupByExprs))
133123
copy(grouping, exprs[len(g.SelectedExprs):])
134124

135-
return NewGroupBy(agg, grouping, g.SelectedAliasMap, g.Child), nil
125+
return NewGroupBy(agg, grouping, g.Child), nil
136126
}
137127

138128
func (g *GroupBy) String() string {

sql/planbuilder/aggregates.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ func (b *Builder) buildAggregation(fromScope, projScope *scope, groupingCols []s
203203
var selectExprs []sql.Expression
204204
var selectGfs []sql.Expression
205205
selectStr := make(map[string]bool)
206-
aliasMap := make(map[string][]string)
207206
for _, e := range group.aggregations() {
208207
if !selectStr[strings.ToLower(e.String())] {
209208
selectExprs = append(selectExprs, e.scalar)
@@ -213,14 +212,12 @@ func (b *Builder) buildAggregation(fromScope, projScope *scope, groupingCols []s
213212
}
214213
var aliases []sql.Expression
215214
for _, col := range projScope.cols {
216-
var isAlias bool
217215
// eval aliases in project scope
218216
switch e := col.scalar.(type) {
219217
case *expression.Alias:
220218
if !e.Unreferencable() {
221219
aliases = append(aliases, e.WithId(sql.ColumnId(col.id)).(*expression.Alias))
222220
}
223-
isAlias = true
224221
default:
225222
}
226223

@@ -234,9 +231,6 @@ func (b *Builder) buildAggregation(fromScope, projScope *scope, groupingCols []s
234231
selectGfs = append(selectGfs, e)
235232
selectStr[colName] = true
236233
}
237-
if isAlias {
238-
aliasMap[colName] = append(aliasMap[colName], strings.ToLower(col.scalar.String()))
239-
}
240234
default:
241235
}
242236
return false
@@ -251,7 +245,7 @@ func (b *Builder) buildAggregation(fromScope, projScope *scope, groupingCols []s
251245
selectStr[e.String()] = true
252246
}
253247
}
254-
gb := plan.NewGroupBy(selectExprs, groupingCols, aliasMap, fromScope.node)
248+
gb := plan.NewGroupBy(selectExprs, groupingCols, fromScope.node)
255249
outScope.node = gb
256250

257251
if len(aliases) > 0 {

0 commit comments

Comments
 (0)