Skip to content

Commit 9dcdbcd

Browse files
committed
opt: fix error with DISTINCT ON and ORDER BY ASC NULLS LAST
This commit fixes an error that could occur in the optbuilder when planning a query with DISTINCT ON and a non-standard nulls ordering. The optimizer supports queries with a non-standard nulls ordering by projecting a column with the expression (col IS NULL) and adding it to the ordering. Since we require that DISTINCT ON columns must be the prefix of any ordering columns, we must account for the new ordering column when building DISTINCT ON. A previous bug fix for cockroachdb#90763 caused the new column to be simply ignored when building DISTINCT ON, but this was insufficient. We need to actually include the new column among the DISTINCT ON columns. This commit makes that change. Fixes cockroachdb#107839 Release note (bug fix): Fixed a spurious error "no data source matches prefix" that could occur during planning for a query with DISTINCT ON and ORDER BY ASC NULLS LAST or ORDER BY DESC NULLS FIRST.
1 parent 898c592 commit 9dcdbcd

File tree

3 files changed

+123
-31
lines changed

3 files changed

+123
-31
lines changed

pkg/sql/logictest/testdata/logic_test/distinct_on

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,3 +405,32 @@ Action
405405
Biography
406406
Crime
407407
NULL
408+
409+
# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.
410+
statement ok
411+
CREATE TABLE t1 (id int, str text);
412+
CREATE TABLE t2 (id int, num int);
413+
INSERT INTO t1 VALUES (1, 'hello'), (2, NULL);
414+
INSERT INTO t2 VALUES (1, 1), (2, 2), (NULL, NULL)
415+
416+
query II
417+
SELECT
418+
DISTINCT ON (t2.id)
419+
t2.*
420+
FROM t1, t2
421+
ORDER BY t2.id DESC NULLS FIRST
422+
----
423+
NULL NULL
424+
2 2
425+
1 1
426+
427+
query II
428+
SELECT
429+
DISTINCT ON (t2.id)
430+
t2.*
431+
FROM t1, t2
432+
ORDER BY t2.id ASC NULLS LAST
433+
----
434+
1 1
435+
2 2
436+
NULL NULL

pkg/sql/opt/optbuilder/distinct.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,23 +89,29 @@ func (b *Builder) buildDistinctOn(
8989
var seen opt.ColSet
9090
for _, col := range inScope.ordering {
9191
if !distinctOnCols.Contains(col.ID()) {
92+
colIsValid := false
9293
scopeCol := inScope.getColumn(col.ID())
9394
if scopeCol != nil {
9495
if isExpr, ok := scopeCol.scalar.(*memo.IsExpr); ok {
9596
if _, ok := isExpr.Right.(*memo.NullExpr); ok {
9697
if v, ok := isExpr.Left.(*memo.VariableExpr); ok {
9798
if distinctOnCols.Contains(v.Col) {
9899
// We have a col IS NULL expression (case 3 above).
99-
continue
100+
// Add the new column to distinctOnCols, since it doesn't change
101+
// the semantics of the DISTINCT ON.
102+
distinctOnCols.Add(col.ID())
103+
colIsValid = true
100104
}
101105
}
102106
}
103107
}
104108
}
105-
panic(pgerror.Newf(
106-
pgcode.InvalidColumnReference,
107-
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
108-
))
109+
if !colIsValid {
110+
panic(pgerror.Newf(
111+
pgcode.InvalidColumnReference,
112+
"SELECT DISTINCT ON expressions must match initial ORDER BY expressions",
113+
))
114+
}
109115
}
110116
seen.Add(col.ID())
111117
if seen.Equals(distinctOnCols) {

pkg/sql/opt/optbuilder/testdata/distinct_on

Lines changed: 83 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -507,30 +507,87 @@ ORDER BY
507507
"genre" ASC NULLS LAST,
508508
"id" ASC NULLS LAST
509509
----
510+
distinct-on
511+
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:6!null]
512+
├── grouping columns: genre:3 nulls_ordering_genre:6!null
513+
├── internal-ordering: +7,+1 opt(3,6)
514+
├── ordering: +6,+3
515+
├── sort
516+
│ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
517+
│ ├── ordering: +6,+3,+7,+1
518+
│ └── project
519+
│ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
520+
│ ├── scan author
521+
│ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
522+
│ └── projections
523+
│ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
524+
│ └── id:1 IS NULL [as=nulls_ordering_id:7]
525+
└── aggregations
526+
├── first-agg [as=id:1]
527+
│ └── id:1
528+
└── first-agg [as=name:2]
529+
└── name:2
530+
531+
# Regression test for #107839. More fixes for NULLS FIRST/LAST with DISTINCT ON.
532+
533+
exec-ddl
534+
CREATE TABLE t1 (id int, str text)
535+
----
536+
537+
exec-ddl
538+
CREATE TABLE t2 (id int, num int)
539+
----
540+
541+
build
542+
SELECT
543+
DISTINCT ON (t2.id)
544+
t1.id,
545+
t1.str
546+
FROM t1 JOIN t2 ON t1.id = t2.id
547+
ORDER BY t2.id DESC NULLS FIRST
548+
----
510549
sort
511-
├── columns: id:1!null name:2 genre:3 [hidden: nulls_ordering_genre:8!null nulls_ordering_id:9!null]
512-
├── ordering: +8,+3,+9,+1
513-
└── project
514-
├── columns: nulls_ordering_genre:8!null nulls_ordering_id:9!null id:1!null name:2 genre:3
515-
├── distinct-on
516-
│ ├── columns: id:1!null name:2 genre:3
517-
│ ├── grouping columns: genre:3
518-
│ ├── internal-ordering: +6,+7,+1 opt(3)
519-
│ ├── sort
520-
│ │ ├── columns: id:1!null name:2 genre:3 nulls_ordering_genre:6!null nulls_ordering_id:7!null
521-
│ │ ├── ordering: +7,+1 opt(3,6)
522-
│ │ └── project
523-
│ │ ├── columns: nulls_ordering_genre:6!null nulls_ordering_id:7!null id:1!null name:2 genre:3
524-
│ │ ├── scan author
525-
│ │ │ └── columns: id:1!null name:2 genre:3 crdb_internal_mvcc_timestamp:4 tableoid:5
526-
│ │ └── projections
527-
│ │ ├── genre:3 IS NULL [as=nulls_ordering_genre:6]
528-
│ │ └── id:1 IS NULL [as=nulls_ordering_id:7]
529-
│ └── aggregations
530-
│ ├── first-agg [as=id:1]
531-
│ │ └── id:1
532-
│ └── first-agg [as=name:2]
533-
│ └── name:2
534-
└── projections
535-
├── genre:3 IS NULL [as=nulls_ordering_genre:8]
536-
└── id:1 IS NULL [as=nulls_ordering_id:9]
550+
├── columns: id:1!null str:2 [hidden: t2.id:6!null nulls_ordering_id:11!null]
551+
├── ordering: -11,-6
552+
└── distinct-on
553+
├── columns: t1.id:1!null str:2 t2.id:6!null nulls_ordering_id:11!null
554+
├── grouping columns: t2.id:6!null nulls_ordering_id:11!null
555+
├── project
556+
│ ├── columns: nulls_ordering_id:11!null t1.id:1!null str:2 t2.id:6!null
557+
│ ├── inner-join (hash)
558+
│ │ ├── columns: t1.id:1!null str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5 t2.id:6!null num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
559+
│ │ ├── scan t1
560+
│ │ │ └── columns: t1.id:1 str:2 t1.rowid:3!null t1.crdb_internal_mvcc_timestamp:4 t1.tableoid:5
561+
│ │ ├── scan t2
562+
│ │ │ └── columns: t2.id:6 num:7 t2.rowid:8!null t2.crdb_internal_mvcc_timestamp:9 t2.tableoid:10
563+
│ │ └── filters
564+
│ │ └── t1.id:1 = t2.id:6
565+
│ └── projections
566+
│ └── t2.id:6 IS NULL [as=nulls_ordering_id:11]
567+
└── aggregations
568+
├── first-agg [as=t1.id:1]
569+
│ └── t1.id:1
570+
└── first-agg [as=str:2]
571+
└── str:2
572+
573+
# Because of the hack we use to allow ORDER BY NULLS FIRST / LAST with DISTINCT
574+
# ON, we also support this query. Postgres doesn't support it, but it doesn't
575+
# seem like a problem that we do.
576+
build
577+
SELECT DISTINCT ON (id) * FROM t1 ORDER BY id IS NULL, id
578+
----
579+
sort
580+
├── columns: id:1 str:2 [hidden: column6:6!null]
581+
├── ordering: +6,+1
582+
└── distinct-on
583+
├── columns: id:1 str:2 column6:6!null
584+
├── grouping columns: id:1 column6:6!null
585+
├── project
586+
│ ├── columns: column6:6!null id:1 str:2
587+
│ ├── scan t1
588+
│ │ └── columns: id:1 str:2 rowid:3!null crdb_internal_mvcc_timestamp:4 tableoid:5
589+
│ └── projections
590+
│ └── id:1 IS NULL [as=column6:6]
591+
└── aggregations
592+
└── first-agg [as=str:2]
593+
└── str:2

0 commit comments

Comments
 (0)