Skip to content

Commit c55d33b

Browse files
committed
sql: add LTREE valueside encoding
LTREE datums are valueside encoded by their byte representation including path separators. Informs: #44657 Epic: CRDB-148 Release note: None
1 parent c0c106f commit c55d33b

File tree

10 files changed

+172
-1
lines changed

10 files changed

+172
-1
lines changed

pkg/sql/catalog/colinfo/col_type_info.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,10 @@ func MustBeValueEncoded(semanticType *types.T) bool {
230230
return true
231231
case types.PGVectorFamily:
232232
return true
233+
case types.LTreeFamily:
234+
// TODO(paulniziolek): LTreeFamily should be supported in keyside encoding.
235+
// Temporarily, we disallow it, until implemented.
236+
return true
233237
}
234238
return false
235239
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
statement ok
2+
CREATE TABLE l (lt LTREE);
3+
4+
statement ok
5+
CREATE TABLE la (lta LTREE[]);
6+
7+
statement ok
8+
INSERT INTO l VALUES ('A'), ('A.B'), ('A.B.C'), ('A.B.D'), ('Z');
9+
10+
statement ok
11+
INSERT INTO la VALUES (ARRAY['A', 'A.B']), (ARRAY['A.B.C', 'A.B.D', 'Z']), (ARRAY['X', 'Y']), (ARRAY[]), (ARRAY['']);
12+
13+
query T
14+
SELECT * FROM l ORDER BY lt;
15+
----
16+
A
17+
A.B
18+
A.B.C
19+
A.B.D
20+
Z
21+
22+
query T
23+
SELECT * FROM la ORDER BY lta;
24+
----
25+
{}
26+
{""}
27+
{A,A.B}
28+
{A.B.C,A.B.D,Z}
29+
{X,Y}
30+
31+
query T
32+
SELECT pg_typeof(lt) FROM l LIMIT 1;
33+
----
34+
ltree
35+
36+
query T
37+
SELECT pg_typeof(lta) FROM la LIMIT 1;
38+
----
39+
ltree[]
40+
41+
query error label length is 1001, must be at most 1000
42+
INSERT INTO l VALUES (repeat('A', 1001)::LTREE)
43+
44+
query error number of ltree labels \(65536\) exceeds the maximum allowed \(65535\)
45+
INSERT INTO l VALUES ((SELECT string_agg('A', '.') FROM generate_series(1, 65536))::LTREE)
46+
47+
query B
48+
SELECT 'A.B.C'::LTREE = 'A.B.C'
49+
----
50+
true
51+
52+
query B
53+
SELECT 'A.B.C'::LTREE = 'A.B'
54+
----
55+
false
56+
57+
query B
58+
SELECT 'A.B.C'::LTREE < 'A.B'
59+
----
60+
false
61+
62+
query B
63+
SELECT 'A.B'::LTREE < 'A.B.C'
64+
----
65+
true
66+
67+
query B
68+
SELECT ARRAY['A', 'A.B']::LTREE[] = ARRAY['A', 'A.B']
69+
----
70+
true
71+
72+
query B
73+
SELECT ARRAY['A', 'A.B']::LTREE[] = ARRAY['A.B', 'A']
74+
----
75+
false
76+
77+
query B
78+
SELECT ARRAY['A', 'A.B']::LTREE[] < ARRAY['A', 'A.B.C']
79+
----
80+
true

pkg/sql/rowenc/encoded_datum_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,10 @@ func TestEncDatumCompare(t *testing.T) {
218218
case types.AnyFamily, types.UnknownFamily, types.ArrayFamily, types.JsonFamily, types.TupleFamily, types.VoidFamily,
219219
types.TSQueryFamily, types.TSVectorFamily, types.PGVectorFamily, types.TriggerFamily, types.JsonpathFamily:
220220
continue
221+
case types.LTreeFamily:
222+
// TODO(paulniziolek): Temporarily skip LTrees as they are
223+
// currently missing keyside indexing support.
224+
continue
221225
case types.CollatedStringFamily:
222226
typ = types.MakeCollatedString(types.String, *randgen.RandCollationLocale(rng))
223227
}

pkg/sql/rowenc/index_encoding_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,11 @@ func TestEncodeContainingArrayInvertedIndexSpans(t *testing.T) {
587587
if typ.ArrayContents().Family() == types.JsonpathFamily {
588588
continue
589589
}
590+
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
591+
// currently missing keyside indexing support.
592+
if typ.ArrayContents().Family() == types.LTreeFamily {
593+
continue
594+
}
590595

591596
// Generate two random arrays and evaluate the result of `left @> right`.
592597
left := randgen.RandArray(rng, typ, 0 /* nullChance */)
@@ -729,6 +734,12 @@ func TestEncodeContainedArrayInvertedIndexSpans(t *testing.T) {
729734
for i := 0; i < 100; i++ {
730735
typ := randgen.RandArrayType(rng)
731736

737+
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
738+
// currently missing keyside indexing support.
739+
if typ.ArrayContents().Family() == types.LTreeFamily {
740+
continue
741+
}
742+
732743
// Generate two random arrays and evaluate the result of `left <@ right`.
733744
left := randgen.RandArray(rng, typ, 0 /* nullChance */)
734745
right := randgen.RandArray(rng, typ, 0 /* nullChance */)
@@ -973,6 +984,11 @@ func TestEncodeOverlapsArrayInvertedIndexSpans(t *testing.T) {
973984
if typ.ArrayContents().Family() == types.JsonpathFamily {
974985
continue
975986
}
987+
// TODO(paulniziolek): Temporarily skip arrays with LTREEs as they are
988+
// currently missing keyside indexing support.
989+
if typ.ArrayContents().Family() == types.LTreeFamily {
990+
continue
991+
}
976992

977993
// Generate two random arrays and evaluate the result of `left && right`.
978994
// Using 1/9th as the Null Chance to generate arrays with a small

pkg/sql/rowenc/valueside/array.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@ func DatumTypeToArrayElementEncodingType(t *types.T) (encoding.Type, error) {
241241
return encoding.IPAddr, nil
242242
case types.JsonFamily:
243243
return encoding.JSON, nil
244+
case types.LTreeFamily:
245+
return encoding.LTree, nil
244246
case types.TupleFamily:
245247
return encoding.Tuple, nil
246248
case types.ArrayFamily:
@@ -362,6 +364,8 @@ func encodeArrayElement(b []byte, d tree.Datum) ([]byte, error) {
362364
return nil, err
363365
}
364366
return encoding.EncodeUntaggedBytesValue(b, encoded), nil
367+
case *tree.DLTree:
368+
return encoding.EncodeUntaggedLTreeValue(b, t.LTree), nil
365369
default:
366370
return nil, errors.Errorf("don't know how to encode %s (%T)", d, d)
367371
}

pkg/sql/rowenc/valueside/decode.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,9 @@ func DecodeUntaggedDatum(
244244
// the loss of variable length encoding.
245245
b, data, err := encoding.DecodeUntaggedIntValue(buf)
246246
return a.NewDOid(tree.MakeDOid(oid.Oid(data), t)), b, err
247+
case types.LTreeFamily:
248+
b, l, err := encoding.DecodeUntaggedLTreeValue(buf)
249+
return tree.NewDLTree(l), b, err
247250
case types.ArrayFamily:
248251
// Skip the encoded data length.
249252
b, _, _, err := encoding.DecodeNonsortingUvarint(buf)

pkg/sql/rowenc/valueside/encode.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ func EncodeWithScratch(
125125
return encoding.EncodeBytesValue(appendTo, uint32(colID), t.UnsafeContentBytes()), scratch, nil
126126
case *tree.DOid:
127127
return encoding.EncodeIntValue(appendTo, uint32(colID), int64(t.Oid)), scratch, nil
128+
case *tree.DLTree:
129+
return encoding.EncodeLTreeValue(appendTo, uint32(colID), t.LTree), scratch, nil
128130
case *tree.DEnum:
129131
return encoding.EncodeBytesValue(appendTo, uint32(colID), t.PhysicalRep), scratch, nil
130132
case *tree.DVoid:

pkg/sql/rowenc/valueside/legacy.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/cockroachdb/cockroach/pkg/sql/pgrepl/lsn"
1414
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
1515
"github.com/cockroachdb/cockroach/pkg/sql/types"
16+
"github.com/cockroachdb/cockroach/pkg/util/encoding"
1617
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
1718
"github.com/cockroachdb/cockroach/pkg/util/json"
1819
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
@@ -174,6 +175,12 @@ func MarshalLegacy(colType *types.T, val tree.Datum) (roachpb.Value, error) {
174175
r.SetBytes(data)
175176
return r, nil
176177
}
178+
case types.LTreeFamily:
179+
if v, ok := val.(*tree.DLTree); ok {
180+
data := encoding.EncodeUntaggedLTreeValue(nil, v.LTree)
181+
r.SetBytes(data)
182+
return r, nil
183+
}
177184
case types.ArrayFamily:
178185
if v, ok := val.(*tree.DArray); ok {
179186
if err := checkElementType(v.ParamTyp, colType.ArrayContents()); err != nil {
@@ -389,6 +396,16 @@ func UnmarshalLegacy(a *tree.DatumAlloc, typ *types.T, value roachpb.Value) (tre
389396
return nil, err
390397
}
391398
return a.NewDOid(tree.MakeDOid(oid.Oid(v), typ)), nil
399+
case types.LTreeFamily:
400+
v, err := value.GetBytes()
401+
if err != nil {
402+
return nil, err
403+
}
404+
_, l, err := encoding.DecodeUntaggedLTreeValue(v)
405+
if err != nil {
406+
return nil, err
407+
}
408+
return tree.NewDLTree(l), nil
392409
case types.ArrayFamily:
393410
v, err := value.GetBytes()
394411
if err != nil {

pkg/util/encoding/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ go_library(
1818
"//pkg/util/duration",
1919
"//pkg/util/encoding/encodingtype",
2020
"//pkg/util/ipaddr",
21+
"//pkg/util/ltree",
2122
"//pkg/util/protoutil",
2223
"//pkg/util/timeofday",
2324
"//pkg/util/timetz",

pkg/util/encoding/encoding.go

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/cockroachdb/cockroach/pkg/util/duration"
2626
"github.com/cockroachdb/cockroach/pkg/util/encoding/encodingtype"
2727
"github.com/cockroachdb/cockroach/pkg/util/ipaddr"
28+
"github.com/cockroachdb/cockroach/pkg/util/ltree"
2829
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
2930
"github.com/cockroachdb/cockroach/pkg/util/timeofday"
3031
"github.com/cockroachdb/cockroach/pkg/util/timetz"
@@ -1786,6 +1787,7 @@ const (
17861787
JsonEmptyArray Type = 42
17871788
JsonEmptyArrayDesc Type = 43
17881789
PGVector Type = 44
1790+
LTree Type = 45
17891791
)
17901792

17911793
// typMap maps an encoded type byte to a decoded Type. It's got 256 slots, one
@@ -2840,6 +2842,22 @@ func EncodePGVectorValue(appendTo []byte, colIDDelta uint32, data []byte) []byte
28402842
return EncodeUntaggedBytesValue(appendTo, data)
28412843
}
28422844

2845+
// EncodeLTreeValue encodes a ltree.T value with its value tag, appends it to
2846+
// the supplied buffer, and returns the final buffer.
2847+
func EncodeLTreeValue(appendTo []byte, colIDDelta uint32, l ltree.T) []byte {
2848+
appendTo = EncodeValueTag(appendTo, colIDDelta, LTree)
2849+
return EncodeUntaggedLTreeValue(appendTo, l)
2850+
}
2851+
2852+
// EncodeUntaggedLTreeValue encodes a ltree.T value, appends it to the supplied
2853+
// buffer, and returns the final buffer.
2854+
func EncodeUntaggedLTreeValue(appendTo []byte, l ltree.T) []byte {
2855+
var buf bytes.Buffer
2856+
l.FormatToBuffer(&buf)
2857+
appendTo = EncodeUntaggedBytesValue(appendTo, buf.Bytes())
2858+
return appendTo
2859+
}
2860+
28432861
// DecodeValueTag decodes a value encoded by EncodeValueTag, used as a prefix in
28442862
// each of the other EncodeFooValue methods.
28452863
//
@@ -3189,6 +3207,28 @@ func DecodeUntaggedIPAddrValue(b []byte) (remaining []byte, u ipaddr.IPAddr, err
31893207
return remaining, u, err
31903208
}
31913209

3210+
// DecodeLTreeValue decodes a value encoded by EncodeLTreeValue.
3211+
func DecodeLTreeValue(b []byte) (remaining []byte, l ltree.T, err error) {
3212+
b, err = decodeValueTypeAssert(b, LTree)
3213+
if err != nil {
3214+
return b, l, err
3215+
}
3216+
return DecodeUntaggedLTreeValue(b)
3217+
}
3218+
3219+
// DecodeUntaggedLTreeValue decodes a value encoded by EncodeUntaggedLTreeValue.
3220+
func DecodeUntaggedLTreeValue(b []byte) (remaining []byte, l ltree.T, err error) {
3221+
remaining, data, err := DecodeUntaggedBytesValue(b)
3222+
if err != nil {
3223+
return b, l, err
3224+
}
3225+
l, err = ltree.ParseLTree(string(data))
3226+
if err != nil {
3227+
return b, l, err
3228+
}
3229+
return remaining, l, nil
3230+
}
3231+
31923232
func decodeValueTypeAssert(b []byte, expected Type) ([]byte, error) {
31933233
_, dataOffset, _, typ, err := DecodeValueTag(b)
31943234
if err != nil {
@@ -3243,7 +3283,7 @@ func PeekValueLengthWithOffsetsAndType(b []byte, dataOffset int, typ Type) (leng
32433283
return dataOffset + n, err
32443284
case Float:
32453285
return dataOffset + floatValueEncodedLength, nil
3246-
case Bytes, Array, JSON, Geo, TSVector, TSQuery, PGVector:
3286+
case Bytes, Array, JSON, Geo, TSVector, TSQuery, PGVector, LTree:
32473287
_, n, i, err := DecodeNonsortingUvarint(b)
32483288
return dataOffset + n + int(i), err
32493289
case Box2D:

0 commit comments

Comments
 (0)