Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 35 additions & 33 deletions enginetest/queries/logic_test_scripts.go
Original file line number Diff line number Diff line change
Expand Up @@ -1004,39 +1004,41 @@ var SQLLogicSubqueryTests = []ScriptTest{
},
},
},
//{
// Name: "multiple nested subquery",
// SetUpScript: []string{
// "CREATE TABLE `groups`(id SERIAL PRIMARY KEY, data JSON);",
// "INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 1\", \"members\": [{\"name\": \"admin\", \"type\": \"USER\"}, {\"name\": \"user\", \"type\": \"USER\"}]}');",
// "INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 2\", \"members\": [{\"name\": \"admin2\", \"type\": \"USER\"}]}');",
// "CREATE TABLE t32786 (id VARCHAR(36) PRIMARY KEY, parent_id VARCHAR(36), parent_path text);",
// "INSERT INTO t32786 VALUES ('3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null, null);",
// "INSERT INTO t32786 VALUES ('5AE7EAFD-8277-4F41-83DE-0FD4B4482169', '3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null);",
// "CREATE TABLE users (id INT8 NOT NULL, name VARCHAR(50), PRIMARY KEY (id));",
// "INSERT INTO users(id, name) VALUES (1, 'user1');",
// "INSERT INTO users(id, name) VALUES (2, 'user2');",
// "INSERT INTO users(id, name) VALUES (3, 'user3');",
// "CREATE TABLE stuff(id INT8 NOT NULL, date DATE, user_id INT8, PRIMARY KEY (id), FOREIGN KEY (user_id) REFERENCES users (id));",
// "INSERT INTO stuff(id, date, user_id) VALUES (1, '2007-10-15', 1);",
// "INSERT INTO stuff(id, date, user_id) VALUES (2, '2007-12-15', 1);",
// "INSERT INTO stuff(id, date, user_id) VALUES (3, '2007-11-15', 1);",
// "INSERT INTO stuff(id, date, user_id) VALUES (4, '2008-01-15', 2);",
// "INSERT INTO stuff(id, date, user_id) VALUES (5, '2007-06-15', 3);",
// "INSERT INTO stuff(id, date, user_id) VALUES (6, '2007-03-15', 3);",
// },
// Assertions: []ScriptTestAssertion{
// {
// Skip: true,
// Query: "SELECT users.id AS users_id, users.name AS users_name, stuff_1.id AS stuff_1_id, stuff_1.date AS stuff_1_date, stuff_1.user_id AS stuff_1_user_id FROM users LEFT JOIN stuff AS stuff_1 ON users.id = stuff_1.user_id AND stuff_1.id = (SELECT stuff_2.id FROM stuff AS stuff_2 WHERE stuff_2.user_id = users.id ORDER BY stuff_2.date DESC LIMIT 1) ORDER BY users.name;",
// Expected: []sql.Row{
// {1, "user1", 2, 2007-12-15, 1},
// {2, "user2", 4, 2008-01-15, 2},
// {3, "user3", 5, 2007-06-15, 3},
// },
// },
// },
//},
{
// Skipping because we don't convert Time objects to strings in enginetests
Skip: true,
Name: "multiple nested subquery",
SetUpScript: []string{
"CREATE TABLE `groups`(id SERIAL PRIMARY KEY, data JSON);",
"INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 1\", \"members\": [{\"name\": \"admin\", \"type\": \"USER\"}, {\"name\": \"user\", \"type\": \"USER\"}]}');",
"INSERT INTO `groups`(data) VALUES('{\"name\": \"Group 2\", \"members\": [{\"name\": \"admin2\", \"type\": \"USER\"}]}');",
"CREATE TABLE t32786 (id VARCHAR(36) PRIMARY KEY, parent_id VARCHAR(36), parent_path text);",
"INSERT INTO t32786 VALUES ('3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null, null);",
"INSERT INTO t32786 VALUES ('5AE7EAFD-8277-4F41-83DE-0FD4B4482169', '3AAA2577-DBC3-47E7-9E85-9CC7E19CF48A', null);",
"CREATE TABLE users (id INT8 NOT NULL, name VARCHAR(50), PRIMARY KEY (id));",
"INSERT INTO users(id, name) VALUES (1, 'user1');",
"INSERT INTO users(id, name) VALUES (2, 'user2');",
"INSERT INTO users(id, name) VALUES (3, 'user3');",
"CREATE TABLE stuff(id INT8 NOT NULL, date DATE, user_id INT8, PRIMARY KEY (id), FOREIGN KEY (user_id) REFERENCES users (id));",
"INSERT INTO stuff(id, date, user_id) VALUES (1, '2007-10-15', 1);",
"INSERT INTO stuff(id, date, user_id) VALUES (2, '2007-12-15', 1);",
"INSERT INTO stuff(id, date, user_id) VALUES (3, '2007-11-15', 1);",
"INSERT INTO stuff(id, date, user_id) VALUES (4, '2008-01-15', 2);",
"INSERT INTO stuff(id, date, user_id) VALUES (5, '2007-06-15', 3);",
"INSERT INTO stuff(id, date, user_id) VALUES (6, '2007-03-15', 3);",
},
Assertions: []ScriptTestAssertion{
{
Skip: true,
Query: "SELECT users.id AS users_id, users.name AS users_name, stuff_1.id AS stuff_1_id, stuff_1.date AS stuff_1_date, stuff_1.user_id AS stuff_1_user_id FROM users LEFT JOIN stuff AS stuff_1 ON users.id = stuff_1.user_id AND stuff_1.id = (SELECT stuff_2.id FROM stuff AS stuff_2 WHERE stuff_2.user_id = users.id ORDER BY stuff_2.date DESC LIMIT 1) ORDER BY users.name;",
Expected: []sql.Row{
{1, "user1", 2, "2007 - 12 - 15", 1},
{2, "user2", 4, "2008 - 01 - 15", 2},
{3, "user3", 5, "2007 - 06 - 15", 3},
},
},
},
},
{
Name: "multiple nested subquery again",
SetUpScript: []string{
Expand Down
85 changes: 49 additions & 36 deletions enginetest/queries/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -828,10 +828,6 @@ var QueryTests = []QueryTest{
Query: "select y as x from xy group by (y) having AVG(x) > 0",
Expected: []sql.Row{{0}, {1}, {3}},
},
// {
// Query: "select y as z from xy group by (y) having AVG(z) > 0",
// Expected: []sql.Row{{1}, {2}, {3}},
// },
{
Query: "SELECT * FROM mytable t0 INNER JOIN mytable t1 ON (t1.i IN (((true)%(''))));",
Expected: []sql.Row{},
Expand Down Expand Up @@ -10381,6 +10377,28 @@ from typestable`,
{2, "second row"},
},
},
{
Query: "select * from two_pk group by pk1, pk2",
Expected: []sql.Row{
{0, 0, 0, 1, 2, 3, 4},
{0, 1, 10, 11, 12, 13, 14},
{1, 0, 20, 21, 22, 23, 24},
{1, 1, 30, 31, 32, 33, 34},
},
},
{
Query: "select pk1+1 from two_pk group by pk1 + 1, mod(pk2, 2)",
Expected: []sql.Row{
{1}, {1}, {2}, {2},
},
},
{
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
Expected: []sql.Row{
// mod is a Decimal type, which we convert to a string in our enginetests
{"0"}, {"1"}, {"0"}, {"1"},
},
},
}

var KeylessQueries = []QueryTest{
Expand Down Expand Up @@ -10632,6 +10650,20 @@ FROM mytable;`,
{"DECIMAL"},
},
},
// https://github.com/dolthub/dolt/issues/7095
// References in group by and having should be allowed to match select aliases
{
Query: "select y as z from xy group by (y) having AVG(z) > 0",
Expected: []sql.Row{{1}, {2}, {3}},
},
{
Query: "select y as z from xy group by (z) having AVG(z) > 0",
Expected: []sql.Row{{1}, {2}, {3}},
},
{
Query: "select y + 1 as z from xy group by (z) having AVG(z) > 1",
Expected: []sql.Row{{2}, {3}, {4}},
},
}

var VersionedQueries = []QueryTest{
Expand Down Expand Up @@ -11399,6 +11431,15 @@ var ErrorQueries = []QueryErrorTest{
Query: "SELECT 1 INTO mytable;",
ExpectedErr: sql.ErrUndeclaredVariable,
},
{
Query: "select * from two_pk group by pk1",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
},
{
// Grouping over functions and math expressions over PK does not count, and must appear in select
Query: "select * from two_pk group by pk1 + 1, mod(pk2, 2)",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
},
}

var BrokenErrorQueries = []QueryErrorTest{
Expand All @@ -11415,10 +11456,10 @@ var BrokenErrorQueries = []QueryErrorTest{
ExpectedErr: sql.ErrTableNotFound,
},

// Our behavior in when sql_mode = ONLY_FULL_GROUP_BY is inconsistent with MySQL
// Our behavior in when sql_mode = ONLY_FULL_GROUP_BY is inconsistent with MySQL. This is because we skip validation
// for GroupBys wrapped in a Project since we are not able to validate selected expressions that get optimized as an
// alias.
// Relevant issue: https://github.com/dolthub/dolt/issues/4998
// Special case: If you are grouping by every field of the PK, then you can select anything
// Otherwise, whatever you are selecting must be in the Group By (with the exception of aggregations)
{
Query: "SELECT col0, floor(col1) FROM tab1 GROUP by col0;",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
Expand All @@ -11427,39 +11468,11 @@ var BrokenErrorQueries = []QueryErrorTest{
Query: "SELECT floor(cor0.col1) * ceil(cor0.col0) AS col2 FROM tab1 AS cor0 GROUP BY cor0.col0",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
},
{
Query: "select * from two_pk group by pk1, pk2",
// No error
},
{
Query: "select * from two_pk group by pk1",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
},
{
// Grouping over functions and math expressions over PK does not count, and must appear in select
Query: "select * from two_pk group by pk1 + 1, mod(pk2, 2)",
ExpectedErr: analyzererrors.ErrValidationGroupBy,
},
{
// Grouping over functions and math expressions over PK does not count, and must appear in select
Query: "select pk1+1 from two_pk group by pk1 + 1, mod(pk2, 2)",
// No error
},
{
// Grouping over functions and math expressions over PK does not count, and must appear in select
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
// No error
},
{
// Grouping over functions and math expressions over PK does not count, and must appear in select
Query: "select mod(pk2, 2) from two_pk group by pk1 + 1, mod(pk2, 2)",
// No error
},
{
Query: `SELECT any_value(pk), (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) AS x
FROM one_pk opk WHERE (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) > 0
GROUP BY (SELECT max(pk) FROM one_pk WHERE pk < opk.pk) ORDER BY x`,
// No error, but we get opk.pk does not exist
// No error, but we get opk.pk does not exist (aliasing error)
},
// Unimplemented JSON functions
{
Expand Down
48 changes: 29 additions & 19 deletions sql/analyzer/validation_rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,9 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop

switch parent.(type) {
case *plan.Having, *plan.Project, *plan.Sort:
// TODO: these shouldn't be skipped; you can group by primary key without problem b/c only one value
// https://dev.mysql.com/doc/refman/8.0/en/group-by-handling.html#:~:text=The%20query%20is%20valid%20if%20name%20is%20a%20primary%20key
// TODO: these shouldn't be skipped but we currently aren't able to validate GroupBys with selected aliased
// expressions and a lot of our tests group by aliases
// https://github.com/dolthub/dolt/issues/4998
return true
}

Expand All @@ -273,17 +274,34 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop
return true
}

var groupBys []string
primaryKeys := make(map[string]bool)
for _, col := range gb.Child.Schema() {
if col.PrimaryKey {
primaryKeys[strings.ToLower(col.String())] = true
}
}

groupBys := make(map[string]bool)
groupByPrimaryKeys := 0
for _, expr := range gb.GroupByExprs {
groupBys = append(groupBys, expr.String())
exprStr := strings.ToLower(expr.String())
groupBys[exprStr] = true
if primaryKeys[exprStr] {
groupByPrimaryKeys++
}
}

// TODO: also allow grouping by unique non-nullable columns
if len(primaryKeys) != 0 && groupByPrimaryKeys == len(primaryKeys) {
return true
}

for _, expr := range gb.SelectedExprs {
if _, ok := expr.(sql.Aggregation); !ok {
if !expressionReferencesOnlyGroupBys(groupBys, expr) {
err = analyzererrors.ErrValidationGroupBy.New(expr.String())
return false
}
if !expressionReferencesOnlyGroupBys(groupBys, expr) {
// TODO: this is currently too restrictive. Dependent columns are fine to reference
// https://dev.mysql.com/doc/refman/8.4/en/group-by-functional-dependence.html
err = analyzererrors.ErrValidationGroupBy.New(expr.String())
return false
}
}
return true
Expand All @@ -292,22 +310,14 @@ func validateGroupBy(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scop
return n, transform.SameTree, err
}

func expressionReferencesOnlyGroupBys(groupBys []string, expr sql.Expression) bool {
func expressionReferencesOnlyGroupBys(groupBys map[string]bool, expr sql.Expression) bool {
valid := true
sql.Inspect(expr, func(expr sql.Expression) bool {
switch expr := expr.(type) {
case nil, sql.Aggregation, *expression.Literal:
return false
case *expression.Alias, sql.FunctionExpression:
if stringContains(groupBys, expr.String()) {
return false
}
return true
// cc: https://dev.mysql.com/doc/refman/8.0/en/group-by-handling.html
// Each part of the SelectExpr must refer to the aggregated columns in some way
// TODO: this isn't complete, it's overly restrictive. Dependant columns are fine to reference.
default:
if stringContains(groupBys, expr.String()) {
if groupBys[strings.ToLower(expr.String())] {
return false
}

Expand Down
4 changes: 4 additions & 0 deletions sql/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ func (c Column) Copy() *Column {
return &c
}

func (c *Column) String() string {
return c.Source + "." + c.Name
}

// TableId is the unique identifier of a table or table alias in a multi-db environment.
// The long-term goal is to migrate all uses of table name strings to this and minimize places where we
// construct/inspect TableIDs. By treating this as an opaque identifier, it will be easier to migrate to
Expand Down