Skip to content

Commit fdb713a

Browse files
craig[bot]paulniziolek
andcommitted
Merge #147045
147045: sql: add support for like with collated columns r=paulniziolek a=paulniziolek #### sql: add support for like with collated columns Fixes: #20666 Epic: CRDB-5918 Release note (sql change): Deterministic collations are now supported with LIKE. A deterministic collation considers strings to be equal only if they consist of the same byte sequence. Co-authored-by: Paul Niziolek <[email protected]>
2 parents e111667 + 785c6ed commit fdb713a

File tree

12 files changed

+185
-18
lines changed

12 files changed

+185
-18
lines changed

docs/generated/sql/operators.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,7 @@
513513
<table><thead>
514514
<tr><td><code>LIKE</code></td><td>Return</td></tr>
515515
</thead><tbody>
516+
<tr><td><a href="collate.html">collatedstring</a> <code>LIKE</code> <a href="collate.html">collatedstring</a></td><td><a href="bool.html">bool</a></td></tr>
516517
<tr><td><a href="string.html">string</a> <code>LIKE</code> <a href="string.html">string</a></td><td><a href="bool.html">bool</a></td></tr>
517518
</tbody></table>
518519
<table><thead>

pkg/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,7 @@ ALL_TESTS = [
733733
"//pkg/util/cidr:cidr_test",
734734
"//pkg/util/circuit:circuit_test",
735735
"//pkg/util/cloudinfo:cloudinfo_test",
736+
"//pkg/util/collatedstring:collatedstring_test",
736737
"//pkg/util/container/heap:heap_test",
737738
"//pkg/util/container/list:list_test",
738739
"//pkg/util/container/ring:ring_test",
@@ -2577,6 +2578,7 @@ GO_TARGETS = [
25772578
"//pkg/util/cloudinfo:cloudinfo",
25782579
"//pkg/util/cloudinfo:cloudinfo_test",
25792580
"//pkg/util/collatedstring:collatedstring",
2581+
"//pkg/util/collatedstring:collatedstring_test",
25802582
"//pkg/util/container/heap:heap",
25812583
"//pkg/util/container/heap:heap_test",
25822584
"//pkg/util/container/list:list",

pkg/sql/logictest/testdata/logic_test/collatedstring

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,3 +762,9 @@ SELECT b COLLATE "en_US_u_ks_level2" FROM str_arr ORDER BY 1
762762
NULL
763763
d
764764
h
765+
766+
query error unsupported comparison operator: <collatedstring{en_US}> LIKE <collatedstring{de_DE}>
767+
SELECT 'TEST' COLLATE "en_US" LIKE 'TEST' COLLATE "de_DE"
768+
769+
query error nondeterministic collations are not supported for LIKE
770+
SELECT 'TEST' COLLATE "en_US-u-ks-level1" LIKE 'TEST' COLLATE "en_US-u-ks-level1"

pkg/sql/pgwire/pgwire_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ func TestPGPreparedQuery(t *testing.T) {
478478
baseTest.SetArgs(2, 3).Results(2),
479479
baseTest.SetArgs(true, 0).Error(`error in argument for \$1: could not parse "true" as type int: strconv.ParseInt: parsing "true": invalid syntax`),
480480
}},
481-
{"SELECT $1[2] LIKE 'b'", []preparedQueryTest{
481+
{"SELECT ($1::TEXT[])[2] LIKE 'b'", []preparedQueryTest{
482482
baseTest.SetArgs(pq.Array([]string{"a", "b", "c"})).Results(true),
483483
baseTest.SetArgs(pq.Array([]gosql.NullString{{String: "a", Valid: true}, {Valid: false}, {String: "c", Valid: true}})).Results(gosql.NullBool{Valid: false}),
484484
}},

pkg/sql/rowexec/sampler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ func TestSamplerSketch(t *testing.T) {
308308
},
309309
cardinalities: []int{3, 2, 3},
310310
numNulls: []int{1, 1, 1},
311-
size: []int{352, 352, 704},
311+
size: []int{384, 384, 768},
312312
keySize: []int{89, 89, 178},
313313
valueSize: []int{21, 21, 42},
314314
},

pkg/sql/sem/eval/match.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,17 @@ func matchLike(ctx *Context, left, right tree.Datum, caseInsensitive bool) (tree
8181
if left == tree.DNull || right == tree.DNull {
8282
return tree.DNull, nil
8383
}
84-
s, pattern := string(tree.MustBeDString(left)), string(tree.MustBeDString(right))
84+
var s, pattern string
85+
var err error
86+
s, err = matchStringFromDatum(left)
87+
if err != nil {
88+
return tree.DBoolFalse, err
89+
}
90+
pattern, err = matchStringFromDatum(right)
91+
if err != nil {
92+
return tree.DBoolFalse, err
93+
}
94+
8595
if len(s) == 0 {
8696
// An empty string only matches with an empty pattern or a pattern
8797
// consisting only of '%'. To match PostgreSQL's behavior, we have a
@@ -113,6 +123,18 @@ func matchLike(ctx *Context, left, right tree.Datum, caseInsensitive bool) (tree
113123
return tree.MakeDBool(tree.DBool(matches)), err
114124
}
115125

126+
func matchStringFromDatum(datum tree.Datum) (string, error) {
127+
switch d := datum.(type) {
128+
case *tree.DCollatedString:
129+
if !d.Deterministic {
130+
return "", pgerror.New(pgcode.FeatureNotSupported, "nondeterministic collations are not supported for LIKE")
131+
}
132+
return d.Contents, nil
133+
default:
134+
return string(tree.MustBeDString(d)), nil
135+
}
136+
}
137+
116138
func matchRegexpWithKey(ctx *Context, str tree.Datum, key tree.RegexpCacheKey) (tree.Datum, error) {
117139
re, err := ctx.ReCache.GetRegexp(key)
118140
if err != nil {

pkg/sql/sem/eval/testdata/eval/like

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,3 +1350,43 @@ eval
13501350
'TEST' NOT ILIKE 'TE_'
13511351
----
13521352
true
1353+
1354+
eval
1355+
'TEST' COLLATE "en_US" LIKE 'TEST' COLLATE "en_US"
1356+
----
1357+
true
1358+
1359+
eval
1360+
'TEST' COLLATE "en_US" LIKE 'TEST'
1361+
----
1362+
true
1363+
1364+
eval
1365+
'TEST' LIKE 'TEST' COLLATE "en_US"
1366+
----
1367+
true
1368+
1369+
eval
1370+
'TEST' COLLATE "en_US" LIKE 'TESTER' COLLATE "en_US"
1371+
----
1372+
false
1373+
1374+
eval
1375+
'TEST' COLLATE "en_US" LIKE '_ES%'
1376+
----
1377+
true
1378+
1379+
eval
1380+
'TEST' COLLATE "en_US" LIKE 'test' COLLATE "de_DE"
1381+
----
1382+
unsupported comparison operator: <collatedstring{en_US}> LIKE <collatedstring{de_DE}>
1383+
1384+
eval
1385+
'TEST' COLLATE "en_US_u-ks-level1" LIKE 'test' COLLATE "en_US-u-ks-level1"
1386+
----
1387+
nondeterministic collations are not supported for LIKE
1388+
1389+
eval
1390+
'TEST' COLLATE "en_US-ks-level1" LIKE 'test' COLLATE "en_US-ks-level1"
1391+
----
1392+
invalid locale en_US-ks-level1: language: tag is not well-formed

pkg/sql/sem/tree/datum.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/cockroachdb/cockroach/pkg/sql/types"
3333
"github.com/cockroachdb/cockroach/pkg/util"
3434
"github.com/cockroachdb/cockroach/pkg/util/bitarray"
35+
"github.com/cockroachdb/cockroach/pkg/util/collatedstring"
3536
"github.com/cockroachdb/cockroach/pkg/util/duration"
3637
"github.com/cockroachdb/cockroach/pkg/util/encoding"
3738
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
@@ -1322,7 +1323,8 @@ type DCollatedString struct {
13221323
Contents string
13231324
Locale string
13241325
// Key is the collation key.
1325-
Key []byte
1326+
Key []byte
1327+
Deterministic bool
13261328
}
13271329

13281330
// CollationEnvironment stores the state needed by NewDCollatedString to
@@ -1336,7 +1338,8 @@ type collationEnvironmentCacheEntry struct {
13361338
// locale is interned.
13371339
locale string
13381340
// collator is an expensive factory.
1339-
collator *collate.Collator
1341+
collator *collate.Collator
1342+
deterministic bool
13401343
}
13411344

13421345
func (env *CollationEnvironment) getCacheEntry(
@@ -1353,7 +1356,7 @@ func (env *CollationEnvironment) getCacheEntry(
13531356
return collationEnvironmentCacheEntry{}, err
13541357
}
13551358

1356-
entry = collationEnvironmentCacheEntry{locale, collate.New(tag)}
1359+
entry = collationEnvironmentCacheEntry{locale, collate.New(tag), collatedstring.IsDeterministicCollation(tag)}
13571360
env.cache[locale] = entry
13581361
}
13591362
return entry, nil
@@ -1372,7 +1375,7 @@ func NewDCollatedString(
13721375
env.buffer = &collate.Buffer{}
13731376
}
13741377
key := entry.collator.KeyFromString(env.buffer, contents)
1375-
d := DCollatedString{contents, entry.locale, make([]byte, len(key))}
1378+
d := DCollatedString{contents, entry.locale, make([]byte, len(key)), entry.deterministic}
13761379
copy(d.Key, key)
13771380
env.buffer.Reset()
13781381
return &d, nil
@@ -1445,7 +1448,7 @@ func (d *DCollatedString) IsMin(ctx context.Context, cmpCtx CompareContext) bool
14451448

14461449
// Min implements the Datum interface.
14471450
func (d *DCollatedString) Min(ctx context.Context, cmpCtx CompareContext) (Datum, bool) {
1448-
return &DCollatedString{"", d.Locale, nil}, true
1451+
return &DCollatedString{"", d.Locale, nil, false}, true
14491452
}
14501453

14511454
// Max implements the Datum interface.
@@ -6204,7 +6207,7 @@ var baseDatumTypeSizes = map[types.Family]struct {
62046207
types.FloatFamily: {unsafe.Sizeof(DFloat(0.0)), fixedSize},
62056208
types.DecimalFamily: {unsafe.Sizeof(DDecimal{}), variableSize},
62066209
types.StringFamily: {unsafe.Sizeof(DString("")), variableSize},
6207-
types.CollatedStringFamily: {unsafe.Sizeof(DCollatedString{"", "", nil}), variableSize},
6210+
types.CollatedStringFamily: {unsafe.Sizeof(DCollatedString{"", "", nil, false}), variableSize},
62086211
types.BytesFamily: {unsafe.Sizeof(DBytes("")), variableSize},
62096212
types.EncodedKeyFamily: {unsafe.Sizeof(DBytes("")), variableSize},
62106213
types.DateFamily: {unsafe.Sizeof(DDate{}), fixedSize},

pkg/sql/sem/tree/eval.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,12 +1926,9 @@ var CmpOps = cmpOpFixups(map[treecmp.ComparisonOperatorSymbol]*CmpOpOverloads{
19261926
}},
19271927

19281928
treecmp.Like: {overloads: []*CmpOp{
1929-
{
1930-
LeftType: types.String,
1931-
RightType: types.String,
1932-
EvalOp: &MatchLikeOp{CaseInsensitive: false},
1933-
Volatility: volatility.Leakproof,
1934-
},
1929+
// TODO(mgartner): This overload should be immutable, not leakproof.
1930+
makeLikeFn(types.String, types.String, volatility.Leakproof),
1931+
makeLikeFn(types.AnyCollatedString, types.AnyCollatedString, volatility.Immutable),
19351932
}},
19361933

19371934
treecmp.ILike: {overloads: []*CmpOp{
@@ -2148,6 +2145,15 @@ func makeEvalTupleIn(typ *types.T, v volatility.V) *CmpOp {
21482145
}
21492146
}
21502147

2148+
func makeLikeFn(a, b *types.T, v volatility.V) *CmpOp {
2149+
return &CmpOp{
2150+
LeftType: a,
2151+
RightType: b,
2152+
EvalOp: &MatchLikeOp{CaseInsensitive: false},
2153+
Volatility: v,
2154+
}
2155+
}
2156+
21512157
// MultipleResultsError is returned by QueryRow when more than one result is
21522158
// encountered.
21532159
type MultipleResultsError struct {
Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
1-
load("@io_bazel_rules_go//go:def.bzl", "go_library")
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
22

33
go_library(
44
name = "collatedstring",
55
srcs = ["collatedstring.go"],
66
importpath = "github.com/cockroachdb/cockroach/pkg/util/collatedstring",
77
visibility = ["//visibility:public"],
8-
deps = ["@org_golang_x_text//collate"],
8+
deps = [
9+
"@org_golang_x_text//collate",
10+
"@org_golang_x_text//language",
11+
],
12+
)
13+
14+
go_test(
15+
name = "collatedstring_test",
16+
srcs = ["collatedstring_test.go"],
17+
embed = [":collatedstring"],
18+
deps = [
19+
"@com_github_stretchr_testify//require",
20+
"@org_golang_x_text//language",
21+
],
922
)

0 commit comments

Comments
 (0)