Skip to content

Commit 80799cd

Browse files
committed
colbuilder: fix recently exposed type schema corruption
This commit fixes recently exposed (or introduced, depending on how you look at this) type schema corruption that can occur when planning filter expressions. In particular, the bug was exactly as the comment deleted in 85fd4fb described: ``` // As an example, consider the following scenario in the context of // planFilterExpr method: // 1. r.ColumnTypes={types.Bool} with len=1 and cap=4 // 2. planSelectionOperators adds another types.Int column, so // filterColumnTypes={types.Bool, types.Int} with len=2 and cap=4 // Crucially, it uses exact same underlying array as r.ColumnTypes // uses. // 3. we project out second column, so r.ColumnTypes={types.Bool} // 4. later, we add another types.Float column, so // r.ColumnTypes={types.Bool, types.Float}, but there is enough // capacity in the array, so we simply overwrite the second slot // with the new type which corrupts filterColumnTypes to become // {types.Bool, types.Float}, and we can get into a runtime type // mismatch situation. ``` More concretely, in `planFilterExpr` we are using the passed-in type schema to append new types for the intermediate projection operators, and then we create a "simple project op" that removes those intermediate operators. If we later try to add more output columns, we will overwrite types captured by the intermediate projected away operators. The bug was that the simple project op did not create a new type schema like it's supposed to do. This is now fixed, and we now enforce that the simple project op in `colbuilder` package can only be created by the helper method that explicitly returns the updated type schema, hoping that this will encourage the callers to think about the type schema management to prevent such issues in the future. Release note: None
1 parent 284b6c0 commit 80799cd

File tree

5 files changed

+138
-50
lines changed

5 files changed

+138
-50
lines changed

pkg/sql/colexec/colbuilder/BUILD.bazel

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
22

33
go_library(
44
name = "colbuilder",
5-
srcs = ["execplan.go"],
5+
srcs = [
6+
"execplan.go",
7+
"execplan_util.go",
8+
],
69
importpath = "github.com/cockroachdb/cockroach/pkg/sql/colexec/colbuilder",
710
visibility = ["//visibility:public"],
811
deps = [

pkg/sql/colexec/colbuilder/execplan.go

Lines changed: 23 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ func NewColOperator(
13821382

13831383
aggInput := ehj.(colexecop.Operator)
13841384
if len(hgjSpec.JoinOutputColumns) > 0 {
1385-
aggInput = colexecbase.NewSimpleProjectOp(ehj, len(hjOutputTypes), hgjSpec.JoinOutputColumns)
1385+
aggInput, _ = addProjection(aggInput, hjOutputTypes, hgjSpec.JoinOutputColumns)
13861386
}
13871387

13881388
newAggArgs := *newAggArgs
@@ -1668,26 +1668,19 @@ func NewColOperator(
16681668
result.ToClose = append(result.ToClose, c)
16691669
}
16701670

1671+
result.ColumnTypes = append(result.ColumnTypes, returnType)
16711672
if outputColIdx > numInputCols {
1672-
// We want to project out temporary columns (which have been added in
1673-
// between the input columns and output column) as well as include the
1674-
// new output column (which is located after any temporary columns).
1673+
// We want to project out temporary columns (which have been
1674+
// added in between the input columns and output column) as
1675+
// well as include the new output column (which is located
1676+
// after any temporary columns).
16751677
numOutputCols := numInputCols + 1
16761678
projection := make([]uint32, numOutputCols)
16771679
for i := 0; i < numInputCols; i++ {
16781680
projection[i] = uint32(i)
16791681
}
16801682
projection[numInputCols] = uint32(outputColIdx)
1681-
result.Root = colexecbase.NewSimpleProjectOp(result.Root, numOutputCols, projection)
1682-
// We need to allocate a fresh types slice because we'd
1683-
// "corrupt" the existing slice if we were to overwrite
1684-
// numInputCols'th position.
1685-
inputTypes := result.ColumnTypes[:numInputCols]
1686-
result.ColumnTypes = make([]*types.T, numInputCols+1)
1687-
copy(result.ColumnTypes, inputTypes)
1688-
result.ColumnTypes[numInputCols] = returnType
1689-
} else {
1690-
result.ColumnTypes = append(result.ColumnTypes, returnType)
1683+
result.Root, result.ColumnTypes = addProjection(result.Root, result.ColumnTypes, projection)
16911684
}
16921685

16931686
input = result.Root
@@ -1818,8 +1811,8 @@ func (r opResult) planAndMaybeWrapFilter(
18181811
filter execinfrapb.Expression,
18191812
factory coldata.ColumnFactory,
18201813
) error {
1821-
op, err := planFilterExpr(
1822-
ctx, flowCtx, r.Root, r.ColumnTypes, filter, args.StreamingMemAccount, factory, args.ExprHelper, &r.Releasables,
1814+
err := r.planFilterExpr(
1815+
ctx, flowCtx, filter, args.StreamingMemAccount, factory, args.ExprHelper,
18231816
)
18241817
if err != nil {
18251818
// Filter expression planning failed. Fall back to planning the filter
@@ -1837,7 +1830,6 @@ func (r opResult) planAndMaybeWrapFilter(
18371830
processorID, factory, err,
18381831
)
18391832
}
1840-
r.Root = op
18411833
return nil
18421834
}
18431835

@@ -1963,12 +1955,7 @@ func (r *postProcessResult) planPostProcessSpec(
19631955
}
19641956
renderedCols = append(renderedCols, uint32(outputIdx))
19651957
}
1966-
r.Op = colexecbase.NewSimpleProjectOp(r.Op, len(r.ColumnTypes), renderedCols)
1967-
newTypes := make([]*types.T, len(renderedCols))
1968-
for i, j := range renderedCols {
1969-
newTypes[i] = r.ColumnTypes[j]
1970-
}
1971-
r.ColumnTypes = newTypes
1958+
r.Op, r.ColumnTypes = addProjection(r.Op, r.ColumnTypes, renderedCols)
19721959
}
19731960
if post.Offset != 0 {
19741961
r.Op = colexec.NewOffsetOp(r.Op, post.Offset)
@@ -2030,54 +2017,41 @@ func (r opResult) finishScanPlanning(op colfetcher.ScanOperator, resultTypes []*
20302017
}
20312018

20322019
// planFilterExpr creates all operators to implement filter expression.
2033-
func planFilterExpr(
2020+
func (r opResult) planFilterExpr(
20342021
ctx context.Context,
20352022
flowCtx *execinfra.FlowCtx,
2036-
input colexecop.Operator,
2037-
columnTypes []*types.T,
20382023
filter execinfrapb.Expression,
20392024
acc *mon.BoundAccount,
20402025
factory coldata.ColumnFactory,
20412026
helper *colexecargs.ExprHelper,
2042-
releasables *[]execreleasable.Releasable,
2043-
) (colexecop.Operator, error) {
2044-
expr, err := helper.ProcessExpr(ctx, filter, flowCtx.EvalCtx, columnTypes)
2027+
) error {
2028+
expr, err := helper.ProcessExpr(ctx, filter, flowCtx.EvalCtx, r.ColumnTypes)
20452029
if err != nil {
2046-
return nil, err
2030+
return err
20472031
}
20482032
if expr == tree.DNull {
20492033
// The filter expression is tree.DNull meaning that it is always false, so
20502034
// we put a zero operator.
2051-
return colexecutils.NewZeroOp(input), nil
2035+
r.Root = colexecutils.NewZeroOp(r.Root)
2036+
return nil
20522037
}
20532038
op, _, filterColumnTypes, err := planSelectionOperators(
2054-
ctx, flowCtx.EvalCtx, expr, columnTypes, input, acc, factory, releasables,
2039+
ctx, flowCtx.EvalCtx, expr, r.ColumnTypes, r.Root, acc, factory, &r.Releasables,
20552040
)
20562041
if err != nil {
2057-
return nil, errors.Wrapf(err, "unable to columnarize filter expression %q", filter)
2042+
return errors.Wrapf(err, "unable to columnarize filter expression %q", filter)
20582043
}
2059-
if len(filterColumnTypes) > len(columnTypes) {
2044+
r.Root = op
2045+
if len(filterColumnTypes) > len(r.ColumnTypes) {
20602046
// Additional columns were appended to store projections while
20612047
// evaluating the filter. Project them away.
20622048
var outputColumns []uint32
2063-
for i := range columnTypes {
2049+
for i := range r.ColumnTypes {
20642050
outputColumns = append(outputColumns, uint32(i))
20652051
}
2066-
op = colexecbase.NewSimpleProjectOp(op, len(filterColumnTypes), outputColumns)
2052+
r.Root, r.ColumnTypes = addProjection(r.Root, filterColumnTypes, outputColumns)
20672053
}
2068-
return op, nil
2069-
}
2070-
2071-
// addProjection adds a simple projection on top of op according to projection
2072-
// and returns the updated operator and type schema.
2073-
func addProjection(
2074-
op colexecop.Operator, typs []*types.T, projection []uint32,
2075-
) (colexecop.Operator, []*types.T) {
2076-
newTypes := make([]*types.T, len(projection))
2077-
for i, j := range projection {
2078-
newTypes[i] = typs[j]
2079-
}
2080-
return colexecbase.NewSimpleProjectOp(op, len(typs), projection), newTypes
2054+
return nil
20812055
}
20822056

20832057
func examineLikeOp(op treecmp.ComparisonOperator) (negate bool, caseInsensitive bool) {
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2023 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the Business Source License
4+
// included in the file licenses/BSL.txt.
5+
//
6+
// As of the Change Date specified in that file, in accordance with
7+
// the Business Source License, use of this software will be governed
8+
// by the Apache License, Version 2.0, included in the file
9+
// licenses/APL.txt.
10+
11+
package colbuilder
12+
13+
import (
14+
"github.com/cockroachdb/cockroach/pkg/sql/colexec/colexecbase"
15+
"github.com/cockroachdb/cockroach/pkg/sql/colexecop"
16+
"github.com/cockroachdb/cockroach/pkg/sql/types"
17+
)
18+
19+
// addProjection adds a simple projection on top of op according to projection
20+
// and returns the updated operator and type schema.
21+
//
22+
// Note that this method is the only place that's allowed to create a simple
23+
// project op in colbuilder package (enforced by the linter) in order to force
24+
// the caller to think about the type schema to prevent type schema corruption
25+
// issues like #47889 and #107615.
26+
func addProjection(
27+
op colexecop.Operator, typs []*types.T, projection []uint32,
28+
) (colexecop.Operator, []*types.T) {
29+
newTypes := make([]*types.T, len(projection))
30+
for i, j := range projection {
31+
newTypes[i] = typs[j]
32+
}
33+
return colexecbase.NewSimpleProjectOp(op, len(typs), projection), newTypes
34+
}

pkg/sql/logictest/testdata/logic_test/vectorize_types

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,45 @@ INSERT INTO t64676 VALUES
136136

137137
statement ok
138138
SELECT i + d FROM t64676
139+
140+
# Regression test for type schema corruption when planning filter expressions
141+
# (#107615).
142+
statement ok
143+
CREATE TABLE t107615 AS
144+
SELECT
145+
g::INT2 AS _int2,
146+
g::INT4 AS _int4,
147+
g::INT8 AS _int8,
148+
g::FLOAT8 AS _float8,
149+
'2001-01-01'::DATE + g AS _date,
150+
'2001-01-01'::TIMESTAMP + g * '1 day'::INTERVAL AS _timestamp,
151+
'2001-01-01'::TIMESTAMPTZ + g * '1 day'::INTERVAL AS _timestamptz,
152+
g * '1 day'::INTERVAL AS _interval,
153+
g % 2 = 1 AS _bool,
154+
g::DECIMAL AS _decimal,
155+
g::STRING AS _string,
156+
g::STRING::BYTES AS _bytes,
157+
substring('00000000-0000-0000-0000-' || g::STRING || '00000000000', 1, 36)::UUID AS _uuid
158+
FROM
159+
generate_series(1, 5) AS g;
160+
SET testing_optimizer_random_seed = 4478711114964600496;
161+
SELECT
162+
1.2345678901234564e+23:::FLOAT8,
163+
_string,
164+
_int2,
165+
tableoid,
166+
_int2,
167+
'1942-08-15 21:13:20+00':::TIMESTAMPTZ,
168+
'\xc3a0':::BYTES,
169+
true,
170+
_timestamp,
171+
_date,
172+
e'\x01':::STRING,
173+
_uuid,
174+
'{"test": "json"}':::JSONB,
175+
_int4,
176+
_interval
177+
FROM
178+
t107615
179+
WHERE
180+
(_bool OR (NOT _bool));

pkg/testutils/lint/lint_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2025,6 +2025,41 @@ func TestLint(t *testing.T) {
20252025
}
20262026
})
20272027

2028+
t.Run("TestColbuilderSimpleProject", func(t *testing.T) {
2029+
t.Parallel()
2030+
cmd, stderr, filter, err := dirCmd(
2031+
pkgDir,
2032+
"git",
2033+
"grep",
2034+
"-nE",
2035+
// We prohibit usage of colexecbase.NewSimpleProjectOp outside of
2036+
// addProjection helper in colbuilder package.
2037+
`colexecbase\.NewSimpleProjectOp`,
2038+
"--",
2039+
"sql/colexec/colbuilder*",
2040+
":!sql/colexec/colbuilder/execplan_util.go",
2041+
)
2042+
if err != nil {
2043+
t.Fatal(err)
2044+
}
2045+
2046+
if err := cmd.Start(); err != nil {
2047+
t.Fatal(err)
2048+
}
2049+
2050+
if err := stream.ForEach(filter, func(s string) {
2051+
t.Errorf("\n%s <- forbidden; use addProjection to prevent type schema corruption", s)
2052+
}); err != nil {
2053+
t.Error(err)
2054+
}
2055+
2056+
if err := cmd.Wait(); err != nil {
2057+
if out := stderr.String(); len(out) > 0 {
2058+
t.Fatalf("err=%s, stderr=%s", err, out)
2059+
}
2060+
}
2061+
})
2062+
20282063
t.Run("TestGCAssert", func(t *testing.T) {
20292064
skip.UnderShort(t)
20302065

0 commit comments

Comments
 (0)