Skip to content

Commit d2ba9a9

Browse files
craig[bot]paulniziolek
andcommitted
Merge #149912
149912: sql: add LTREE util package r=paulniziolek a=paulniziolek #### sql: add ltree util pkg with basic functionality The newly introduced ltree pkg introduces the basic underlying LTREE internals for when the LTREE type is to be introduced into CRDB, such as the data structure, ltree parsing and validation, and accessory methods. Informs: #44657 Epic: CRDB-148 Release note: None Co-authored-by: Paul Niziolek <[email protected]>
2 parents 23e1618 + d4ed75f commit d2ba9a9

File tree

5 files changed

+499
-0
lines changed

5 files changed

+499
-0
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@
618618
/pkg/util/intsets/ @cockroachdb/sql-queries-prs
619619
/pkg/util/json/ @cockroachdb/sql-queries-prs
620620
/pkg/util/jsonpath/ @cockroachdb/sql-queries-prs
621+
/pkg/util/ltree/ @cockroachdb/sql-queries-prs
621622
/pkg/util/log/ @cockroachdb/obs-prs @cockroachdb/obs-india-prs
622623
/pkg/util/metric/ @cockroachdb/obs-prs @cockroachdb/obs-india-prs
623624
/pkg/util/mon @cockroachdb/sql-queries-prs

pkg/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,7 @@ ALL_TESTS = [
767767
"//pkg/util/log/logmetrics:logmetrics_test",
768768
"//pkg/util/log/testshout:testshout_test",
769769
"//pkg/util/log:log_test",
770+
"//pkg/util/ltree:ltree_test",
770771
"//pkg/util/metamorphic:metamorphic_test",
771772
"//pkg/util/metric/aggmetric:aggmetric_test",
772773
"//pkg/util/metric:metric_test",
@@ -2671,6 +2672,8 @@ GO_TARGETS = [
26712672
"//pkg/util/log/testshout:testshout_test",
26722673
"//pkg/util/log:log",
26732674
"//pkg/util/log:log_test",
2675+
"//pkg/util/ltree:ltree",
2676+
"//pkg/util/ltree:ltree_test",
26742677
"//pkg/util/memzipper:memzipper",
26752678
"//pkg/util/metamorphic/metamorphicutil:metamorphicutil",
26762679
"//pkg/util/metamorphic:metamorphic",

pkg/util/ltree/BUILD.bazel

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
2+
3+
go_library(
4+
name = "ltree",
5+
srcs = ["ltree.go"],
6+
importpath = "github.com/cockroachdb/cockroach/pkg/util/ltree",
7+
visibility = ["//visibility:public"],
8+
deps = [
9+
"//pkg/sql/pgwire/pgcode",
10+
"//pkg/sql/pgwire/pgerror",
11+
"@com_github_cockroachdb_errors//:errors",
12+
],
13+
)
14+
15+
go_test(
16+
name = "ltree_test",
17+
srcs = ["ltree_test.go"],
18+
embed = [":ltree"],
19+
)

pkg/util/ltree/ltree.go

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package ltree
7+
8+
import (
9+
"bytes"
10+
"strings"
11+
12+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
13+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
14+
"github.com/cockroachdb/errors"
15+
)
16+
17+
const (
18+
PathSeparator = "."
19+
// Postgres imposes a 65535 limit on the number of labels in a ltree.
20+
maxNumOfLabels = 65535
21+
// Postgres docs mention labels must be less than 256 bytes, but in practice,
22+
// Postgres has a limit on up to 1000 characters.
23+
maxLabelLength = 1000
24+
)
25+
26+
var (
27+
// Empty represents the LTree path "".
28+
Empty = T{}
29+
errEmptyLabel = pgerror.New(pgcode.Syntax, "label cannot be empty")
30+
)
31+
32+
// T represents a LTREE path.
33+
type T struct {
34+
// path is an ordered slice of string labels that make up a path in a LTREE column.
35+
path []string
36+
}
37+
38+
// ParseLTree parses a string representation of a path into a T struct.
39+
func ParseLTree(pathStr string) (T, error) {
40+
labels := strings.Split(pathStr, PathSeparator)
41+
if len(labels) > maxNumOfLabels {
42+
return T{}, pgerror.Newf(pgcode.ProgramLimitExceeded, "number of ltree labels (%d) exceeds the maximum allowed (%d)", len(labels), maxNumOfLabels)
43+
}
44+
for _, label := range labels {
45+
err := validateLabel(label)
46+
if err != nil {
47+
if errors.Is(err, errEmptyLabel) && len(labels) == 1 {
48+
// If the only label is empty, we treat it as a valid empty path.
49+
return Empty, nil
50+
}
51+
return Empty, err
52+
}
53+
}
54+
return T{path: labels}, nil
55+
}
56+
57+
// String returns the string representation of T.
58+
func (lt T) String() string {
59+
var b bytes.Buffer
60+
lt.FormatToBuffer(&b)
61+
return b.String()
62+
}
63+
64+
// FormatToBuffer formats the LTREE path into a bytes.Buffer,
65+
// using the PathSeparator.
66+
func (lt T) FormatToBuffer(buf *bytes.Buffer) {
67+
for i, l := range lt.path {
68+
if i > 0 {
69+
buf.WriteString(PathSeparator)
70+
}
71+
buf.WriteString(l)
72+
}
73+
}
74+
75+
// ByteSize returns the size of the T in bytes, which is the sum of the label
76+
// lengths and their path separators.
77+
func (lt T) ByteSize() int {
78+
size := 0
79+
for i, l := range lt.path {
80+
if i > 0 {
81+
size += len(PathSeparator)
82+
}
83+
size += len(l)
84+
}
85+
return size
86+
}
87+
88+
// ForEachLabel iterates over each label in the LTREE path,
89+
// calling the provided function with the index and label.
90+
func (lt T) ForEachLabel(fn func(int, string)) {
91+
for i, l := range lt.path {
92+
fn(i, l)
93+
}
94+
}
95+
96+
// LabelAt returns the label at the specified index in an LTree path.
97+
func (lt T) LabelAt(idx int) (string, error) {
98+
if idx < 0 || idx >= lt.Len() {
99+
return "", pgerror.Newf(pgcode.InvalidParameterValue, "index %d out of bounds", idx)
100+
}
101+
return lt.path[idx], nil
102+
}
103+
104+
// Compare compares two LTrees lexicographically based on their labels.
105+
func (lt T) Compare(other T) int {
106+
minLen := min(lt.Len(), other.Len())
107+
108+
for i := 0; i < minLen; i++ {
109+
if cmp := strings.Compare(lt.path[i], other.path[i]); cmp != 0 {
110+
return cmp
111+
}
112+
}
113+
114+
if lt.Len() < other.Len() {
115+
return -1
116+
} else if lt.Len() > other.Len() {
117+
return 1
118+
}
119+
return 0
120+
}
121+
122+
// Len returns the number of labels in the T.
123+
func (lt T) Len() int {
124+
return len(lt.path)
125+
}
126+
127+
// Copy creates a copy of T.
128+
func (lt T) Copy() T {
129+
copiedLabels := make([]string, lt.Len())
130+
copy(copiedLabels, lt.path)
131+
return T{path: copiedLabels}
132+
}
133+
134+
// Prev returns the lexicographically previous LTree and a bool
135+
// indicating whether it exists.
136+
func (lt T) Prev() (T, bool) {
137+
if lt.Len() == 0 {
138+
return Empty, false
139+
}
140+
141+
lastLabel := lt.path[lt.Len()-1]
142+
if l := prevLabel(lastLabel); l != "" {
143+
result := lt.Copy()
144+
result.path[lt.Len()-1] = l
145+
return result, true
146+
}
147+
148+
if lt.Len() > 1 {
149+
return T{path: lt.path[:lt.Len()-1]}, true
150+
}
151+
152+
return Empty, true
153+
}
154+
155+
// validateLabel checks if a label is valid and returns an error if it is not,
156+
// otherwise, it returns nil.
157+
// A label is valid if it:
158+
// - is not empty
159+
// - does not exceed the maximum length
160+
// - contains only valid characters: '-', '0'-'9', 'A'-'Z', '_', 'a'-'z'
161+
func validateLabel(l string) error {
162+
if len(l) > maxLabelLength {
163+
return pgerror.Newf(pgcode.NameTooLong, "label length is %d, must be at most %d", len(l), maxLabelLength)
164+
}
165+
if l == "" {
166+
return errEmptyLabel
167+
}
168+
for _, c := range l {
169+
if !isValidChar(byte(c)) {
170+
return pgerror.Newf(pgcode.Syntax, "label contains invalid character %c", c)
171+
}
172+
}
173+
return nil
174+
}
175+
176+
// prevLabel returns the lexicographically previous label or empty string if
177+
// none exists.
178+
func prevLabel(s string) string {
179+
if len(s) == 0 {
180+
return ""
181+
}
182+
183+
lastChar := s[len(s)-1]
184+
if prev := prevChar(lastChar); prev != 0 {
185+
return s[:len(s)-1] + string(prev)
186+
}
187+
188+
if len(s) > 1 {
189+
return s[:len(s)-1]
190+
}
191+
192+
return ""
193+
}
194+
195+
// isValidChar returns true if the character is valid in an LTree label.
196+
func isValidChar(c byte) bool {
197+
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') || (c == '-')
198+
}
199+
200+
var prevCharMap = map[byte]byte{
201+
'-': 0,
202+
'0': '-',
203+
'A': '9',
204+
'_': 'Z',
205+
'a': '_',
206+
}
207+
208+
// prevChar returns the previous valid character assuming a given valid
209+
// character, or 0 if none exists.
210+
func prevChar(c byte) byte {
211+
if prev, ok := prevCharMap[c]; ok {
212+
return prev
213+
}
214+
return c - 1
215+
}

0 commit comments

Comments
 (0)