Skip to content

Commit 7cff4e8

Browse files
committed
jsonpath: add like_regex support
This commit add `like_regex` predicate evaluation support. Flags for `like_regex` are not supported yet. Epic: None Release note (sql change): Add `like_regex` predicate evaluation support for jsonpath queries. Flags for `like_regex` are not supported yet.
1 parent a5271a0 commit 7cff4e8

File tree

11 files changed

+240
-25
lines changed

11 files changed

+240
-25
lines changed

pkg/sql/logictest/testdata/logic_test/jsonb_path_query

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# LogicTest: !local-mixed-24.3 !local-mixed-25.1
22

3+
query T
4+
SELECT jsonb_path_query('"\\"', '$ ? (@ like_regex "^\\\\$")');
5+
----
6+
"\\"
7+
38
query T
49
SELECT jsonb_path_query('{}', '$')
510
----
@@ -808,6 +813,107 @@ false
808813
statement error pgcode 22038 pq: left operand of jsonpath operator \% is not a single numeric value
809814
SELECT jsonb_path_query('{}', 'null % 1');
810815

816+
query T
817+
SELECT jsonb_path_query('{}', 'null like_regex "^he.*$"');
818+
----
819+
null
820+
821+
query T
822+
SELECT jsonb_path_query('{}', '"hello" like_regex "^he.*$"');
823+
----
824+
true
825+
826+
query T
827+
SELECT jsonb_path_query('{}', '"ahello" like_regex "^he.*$"');
828+
----
829+
false
830+
831+
query T
832+
SELECT jsonb_path_query('{"a": "e"}', '$.a ? (@ like_regex "^[aeiou]")');
833+
----
834+
"e"
835+
836+
query T
837+
SELECT jsonb_path_query('{"a": {"b": "e"}}', '$.a ? (@.b like_regex "^[aeiou]")');
838+
----
839+
{"b": "e"}
840+
841+
query empty
842+
SELECT jsonb_path_query('{"a": {"b": "r"}}', '$.a ? (@.b like_regex "^[aeiou]")');
843+
844+
query T rowsort
845+
SELECT jsonb_path_query('["apple", "banana", "orange", "umbrella", "grape"]', 'strict $[*] ? (@ like_regex "^[aeiou]")');
846+
----
847+
"apple"
848+
"orange"
849+
"umbrella"
850+
851+
query T rowsort
852+
SELECT jsonb_path_query('[{"balance": "987_650", "name": "a"}, {"balance": "987_424", "name": "b"}, {"balance": "100", "name": "c"}]', '$[*] ? (@.balance like_regex "987_.*").balance');
853+
----
854+
"987_650"
855+
"987_424"
856+
857+
query T
858+
SELECT jsonb_path_query('{"ab\\c": "hello"}', '$."ab\\c"');
859+
----
860+
"hello"
861+
862+
query empty
863+
SELECT jsonb_path_query('"a\nb"', '$ ? (@ like_regex "^.*$")');
864+
865+
query T
866+
SELECT jsonb_path_query('"\\"', '$ ? (@ like_regex "^\\\\$")');
867+
----
868+
"\\"
869+
870+
query T
871+
SELECT jsonb_path_query('"\\\\"', '$ ? (@ like_regex "^\\\\\\\\$")');
872+
----
873+
"\\\\"
874+
875+
query T
876+
SELECT jsonb_path_query('{"paths": ["C:\\Program Files", "D:\\Data"]}', '$.paths[*] ? (@ like_regex "^[A-Z]:\\\\[A-Za-z]+$")');
877+
----
878+
"D:\\Data"
879+
880+
query T rowsort
881+
SELECT jsonb_path_query('{"paths": ["C:\\Program Files (x86)\\", "D:\\My Documents\\", "E:\\Test!@#$"]}', '$.paths[*] ? (@ like_regex "^[A-Z]:\\\\.*\\\\$")');
882+
----
883+
"C:\\Program Files (x86)\\"
884+
"D:\\My Documents\\"
885+
886+
query T rowsort
887+
SELECT jsonb_path_query('{"urls": ["http:\/\/example.com", "https:\/\/test.com\/path"]}', '$.urls[*] ? (@ like_regex "^https?:\/\/.*\.com")');
888+
----
889+
"http://example.com"
890+
"https://test.com/path"
891+
892+
query T rowsort
893+
SELECT jsonb_path_query('{"mixed": ["C:/path\\to/file", "D:\\path/to\\file"]}', '$.mixed[*] ? (@ like_regex "^[A-Z]:[/\\\\].*")');
894+
----
895+
"C:/path\\to/file"
896+
"D:\\path/to\\file"
897+
898+
query T rowsort
899+
SELECT jsonb_path_query('["a+b", "a*b", "a?b", "a.b", "a[b]", "a{b}"]', '$[*] ? (@ like_regex "^a[\\+\\*\\?\\.]b$|^a\\[b\\]$|^a\\{b\\}$")');
900+
----
901+
"a+b"
902+
"a*b"
903+
"a?b"
904+
"a.b"
905+
"a[b]"
906+
"a{b}"
907+
908+
query T rowsort
909+
SELECT jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'lax $[*] ? (@ like_regex "^ab.*c")');
910+
----
911+
"abc"
912+
"abdacb"
913+
914+
# TODO(normanchenn): support scanning identQuote within regex.
915+
# SELECT jsonb_path_query('"He said \"Hello\\World!\""', '$ ? (@ like_regex ".*\"H.*\\\\.*!.*\".*")');
916+
811917
# select jsonb_path_query('[1, 2, 3, 4, 5]', '$[-1]');
812918
# select jsonb_path_query('[1, 2, 3, 4, 5]', 'strict $[-1]');
813919

pkg/sql/scanner/jsonpath_scan.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ func (s *JSONPathScanner) Scan(lval ScanSymType) {
2727
switch ch {
2828
case '$':
2929
// Root path ($)
30-
if s.peek() == '.' || s.peek() == eof || s.peek() == ' ' || s.peek() == '[' || s.peek() == ')' {
30+
if s.peek() == '.' || s.peek() == eof || s.peek() == ' ' || s.peek() == '[' || s.peek() == ')' || s.peek() == '?' {
3131
lval.SetID(lexbase.ROOT)
3232
return
3333
}
@@ -46,7 +46,17 @@ func (s *JSONPathScanner) Scan(lval ScanSymType) {
4646
return
4747
case identQuote:
4848
// "[^"]"
49-
if s.scanString(lval, identQuote, false /* allowEscapes */, true /* requireUTF8 */) {
49+
// When scanning string literals for like_regex patterns, we need to
50+
// consider how to handle escape characters similarly to Postgres.
51+
// See: https://www.postgresql.org/docs/current/functions-json.html#JSONPATH-REGULAR-EXPRESSIONS,
52+
// "any backslashes you want to use in the regular expression must be doubled".
53+
//
54+
// With allowEscapes == true,
55+
// - String literal input "^\\$" is scanned as "^\\$" (one escaped backslash)
56+
// - This matches the behaviour of Postgres.
57+
// With allowEscapes == false,
58+
// - String literal input "^\\$" is scanned as "^\\\\$" (two escaped backslashes)
59+
if s.scanString(lval, identQuote, true /* allowEscapes */, true /* requireUTF8 */) {
5060
lval.SetID(lexbase.STRING)
5161
}
5262
return

pkg/sql/sem/builtins/builtins.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12180,7 +12180,9 @@ func makeTimestampStatementBuiltinOverload(withOutputTZ bool, withInputTZ bool)
1218012180
}
1218112181
}
1218212182

12183-
func makeJsonpathExists(_ context.Context, _ *eval.Context, args tree.Datums) (tree.Datum, error) {
12183+
func makeJsonpathExists(
12184+
_ context.Context, evalCtx *eval.Context, args tree.Datums,
12185+
) (tree.Datum, error) {
1218412186
target := tree.MustBeDJSON(args[0])
1218512187
path := tree.MustBeDJsonpath(args[1])
1218612188
vars := tree.EmptyDJSON
@@ -12191,7 +12193,7 @@ func makeJsonpathExists(_ context.Context, _ *eval.Context, args tree.Datums) (t
1219112193
if len(args) > 3 {
1219212194
silent = tree.MustBeDBool(args[3])
1219312195
}
12194-
exists, err := jsonpath.JsonpathExists(target, path, vars, silent)
12196+
exists, err := jsonpath.JsonpathExists(evalCtx, target, path, vars, silent)
1219512197
if err != nil {
1219612198
return nil, err
1219712199
}

pkg/sql/sem/builtins/generator_builtins.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1624,6 +1624,8 @@ var jsonObjectKeysImpl = makeGeneratorOverload(
16241624
var jsonPathQueryGeneratorType = types.Jsonb
16251625

16261626
type jsonPathQueryGenerator struct {
1627+
evalCtx *eval.Context
1628+
16271629
target tree.DJSON
16281630
path tree.DJsonpath
16291631
vars tree.DJSON
@@ -1634,7 +1636,7 @@ type jsonPathQueryGenerator struct {
16341636
}
16351637

16361638
func makeJsonpathQueryGenerator(
1637-
_ context.Context, _ *eval.Context, args tree.Datums,
1639+
_ context.Context, evalCtx *eval.Context, args tree.Datums,
16381640
) (eval.ValueGenerator, error) {
16391641
target := tree.MustBeDJSON(args[0])
16401642
path := tree.MustBeDJsonpath(args[1])
@@ -1650,10 +1652,11 @@ func makeJsonpathQueryGenerator(
16501652
silent = tree.MustBeDBool(args[3])
16511653
}
16521654
return &jsonPathQueryGenerator{
1653-
target: target,
1654-
path: path,
1655-
vars: vars,
1656-
silent: silent,
1655+
evalCtx: evalCtx,
1656+
target: target,
1657+
path: path,
1658+
vars: vars,
1659+
silent: silent,
16571660
}, nil
16581661
}
16591662

@@ -1664,7 +1667,7 @@ func (g *jsonPathQueryGenerator) ResolvedType() *types.T {
16641667

16651668
// Start implements the eval.ValueGenerator interface.
16661669
func (g *jsonPathQueryGenerator) Start(_ context.Context, _ *kv.Txn) error {
1667-
jsonb, err := jsonpath.JsonpathQuery(g.target, g.path, g.vars, g.silent)
1670+
jsonb, err := jsonpath.JsonpathQuery(g.evalCtx, g.target, g.path, g.vars, g.silent)
16681671
if err != nil {
16691672
return err
16701673
}

pkg/util/jsonpath/eval/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ go_library(
1515
deps = [
1616
"//pkg/sql/pgwire/pgcode",
1717
"//pkg/sql/pgwire/pgerror",
18+
"//pkg/sql/sem/eval",
1819
"//pkg/sql/sem/tree",
1920
"//pkg/util/errorutil/unimplemented",
2021
"//pkg/util/json",

pkg/util/jsonpath/eval/eval.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package eval
77

88
import (
9+
"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
910
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
1011
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
1112
"github.com/cockroachdb/cockroach/pkg/util/json"
@@ -14,10 +15,14 @@ import (
1415
"github.com/cockroachdb/errors"
1516
)
1617

17-
var errUnimplemented = unimplemented.NewWithIssue(22513, "unimplemented")
18-
var errInternal = errors.New("internal error")
18+
var (
19+
errUnimplemented = unimplemented.NewWithIssue(22513, "unimplemented")
20+
errInternal = errors.New("internal error")
21+
)
1922

2023
type jsonpathCtx struct {
24+
evalCtx *eval.Context
25+
2126
// Root of the given JSON object ($). We store this because we will need to
2227
// support queries with multiple root elements (ex. $.a ? ($.b == "hello").
2328
root json.JSON
@@ -26,7 +31,7 @@ type jsonpathCtx struct {
2631
}
2732

2833
func JsonpathQuery(
29-
target tree.DJSON, path tree.DJsonpath, vars tree.DJSON, silent tree.DBool,
34+
evalCtx *eval.Context, target tree.DJSON, path tree.DJsonpath, vars tree.DJSON, silent tree.DBool,
3035
) ([]tree.DJSON, error) {
3136
parsedPath, err := parser.Parse(string(path))
3237
if err != nil {
@@ -35,9 +40,10 @@ func JsonpathQuery(
3540
expr := parsedPath.AST
3641

3742
ctx := &jsonpathCtx{
38-
root: target.JSON,
39-
vars: vars.JSON,
40-
strict: expr.Strict,
43+
evalCtx: evalCtx,
44+
root: target.JSON,
45+
vars: vars.JSON,
46+
strict: expr.Strict,
4147
}
4248
// When silent is true, overwrite the strict mode.
4349
if bool(silent) {
@@ -56,9 +62,9 @@ func JsonpathQuery(
5662
}
5763

5864
func JsonpathExists(
59-
target tree.DJSON, path tree.DJsonpath, vars tree.DJSON, silent tree.DBool,
65+
evalCtx *eval.Context, target tree.DJSON, path tree.DJsonpath, vars tree.DJSON, silent tree.DBool,
6066
) (tree.DBool, error) {
61-
j, err := JsonpathQuery(target, path, vars, silent)
67+
j, err := JsonpathQuery(evalCtx, target, path, vars, silent)
6268
if err != nil {
6369
return false, err
6470
}

pkg/util/jsonpath/eval/operation.go

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,66 @@ func (ctx *jsonpathCtx) evalOperation(
6969
case jsonpath.OpCompEqual, jsonpath.OpCompNotEqual,
7070
jsonpath.OpCompLess, jsonpath.OpCompLessEqual,
7171
jsonpath.OpCompGreater, jsonpath.OpCompGreaterEqual:
72-
res, err := ctx.evalComparison(op, jsonValue, true /* unwrapRight */)
72+
res, err := ctx.evalComparison(op, jsonValue)
7373
if err != nil {
7474
return convertFromBool(jsonpathBoolUnknown), err
7575
}
7676
return convertFromBool(res), nil
7777
case jsonpath.OpAdd, jsonpath.OpSub, jsonpath.OpMult,
7878
jsonpath.OpDiv, jsonpath.OpMod:
7979
return ctx.evalArithmetic(op, jsonValue)
80+
case jsonpath.OpLikeRegex:
81+
res, err := ctx.evalRegex(op, jsonValue)
82+
if err != nil {
83+
return convertFromBool(jsonpathBoolUnknown), err
84+
}
85+
return convertFromBool(res), nil
8086
default:
8187
panic(errors.AssertionFailedf("unhandled operation type"))
8288
}
8389
}
8490

91+
func (ctx *jsonpathCtx) evalRegex(
92+
op jsonpath.Operation, jsonValue json.JSON,
93+
) (jsonpathBool, error) {
94+
l, err := ctx.evalAndUnwrapResult(op.Left, jsonValue, true /* unwrap */)
95+
if err != nil {
96+
return jsonpathBoolUnknown, err
97+
}
98+
if len(l) != 1 {
99+
return jsonpathBoolUnknown, errors.AssertionFailedf("left is not a single string")
100+
}
101+
if l[0].Type() != json.StringJSONType {
102+
return jsonpathBoolUnknown, nil
103+
}
104+
// AsText() provides the correct string representation for regex pattern
105+
// matching by returning raw characters instead of their escaped JSON string
106+
// representations.
107+
//
108+
// Examples:
109+
// - For a JSON string with a backslash ("\\"): AsText() returns two
110+
// backslashes ("\\"), while String() returns "\"\\\\\"" (two escaped
111+
// backslashes enclosed in quotes).
112+
// - For a JSON string with a newline ("\n"): AsText() returns an actual
113+
// newline character ("\n"), while String() returns "\"\\n\"" (an escaped
114+
// backslash and 'n' enclosed in quotes)
115+
text, err := l[0].AsText()
116+
if err != nil {
117+
return jsonpathBoolUnknown, err
118+
}
119+
120+
regexOp := op.Right.(jsonpath.Regex)
121+
r, err := ctx.evalCtx.ReCache.GetRegexp(regexOp)
122+
if err != nil {
123+
return jsonpathBoolUnknown, err
124+
}
125+
res := r.MatchString(*text)
126+
if !res {
127+
return jsonpathBoolFalse, nil
128+
}
129+
return jsonpathBoolTrue, nil
130+
}
131+
85132
func (ctx *jsonpathCtx) evalLogical(
86133
op jsonpath.Operation, current json.JSON,
87134
) (jsonpathBool, error) {
@@ -143,17 +190,14 @@ func (ctx *jsonpathCtx) evalLogical(
143190
// right paths satisfy the condition. In strict mode, even if a pair has been
144191
// found, all pairs need to be checked for errors.
145192
func (ctx *jsonpathCtx) evalComparison(
146-
op jsonpath.Operation, jsonValue json.JSON, unwrapRight bool,
193+
op jsonpath.Operation, jsonValue json.JSON,
147194
) (jsonpathBool, error) {
148-
// The left argument results are always auto-unwrapped.
195+
// The left and right argument results are always auto-unwrapped.
149196
left, err := ctx.evalAndUnwrapResult(op.Left, jsonValue, true /* unwrap */)
150197
if err != nil {
151198
return jsonpathBoolUnknown, err
152199
}
153-
// The right argument results are conditionally unwrapped. Currently, it is
154-
// always unwrapped, but in the future for operations like like_regex, we
155-
// don't want to unwrap the right argument.
156-
right, err := ctx.evalAndUnwrapResult(op.Right, jsonValue, unwrapRight)
200+
right, err := ctx.evalAndUnwrapResult(op.Right, jsonValue, true /* unwrap */)
157201
if err != nil {
158202
return jsonpathBoolUnknown, err
159203
}

pkg/util/jsonpath/expr.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,24 @@ type Current struct{}
108108
var _ Path = Current{}
109109

110110
func (c Current) String() string { return "@" }
111+
112+
type Regex struct {
113+
Regex string
114+
// Flags are currently not used.
115+
Flags string
116+
}
117+
118+
var _ Path = Regex{}
119+
120+
func (r Regex) String() string {
121+
if r.Flags == "" {
122+
return fmt.Sprintf("%q", r.Regex)
123+
}
124+
return fmt.Sprintf("%q flag %q", r.Regex, r.Flags)
125+
}
126+
127+
var _ tree.RegexpCacheKey = Regex{}
128+
129+
func (r Regex) Pattern() (string, error) {
130+
return r.Regex, nil
131+
}

0 commit comments

Comments
 (0)