Skip to content

Commit c96c5a4

Browse files
normanchennyuzefovich
authored andcommitted
jsonpath: add support for like_regex flags
This commit adds support for regex flags for JSONPath's `like_regex` predicate. The supported flags are: - 'i': Case-insensitive matching - 's': Dot matches newline - 'm': Multiline mode (^\$ match at newlines) - 'x': Ignore whitespace mode (Not implemented in Postgres, we return the same error) - 'q': No special characters (treat pattern as a literal) Release note (sql change): Add support for `like_regex` flags in JSONPath queries. For example, `SELECT jsonb_path_query('{}', '"a" like_regex ".*" flag "i"');`.
1 parent b295a06 commit c96c5a4

File tree

9 files changed

+252
-16
lines changed

9 files changed

+252
-16
lines changed

pkg/sql/logictest/testdata/logic_test/jsonb_path_query

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1812,3 +1812,91 @@ SELECT jsonb_path_query('"1"', '$.abs()');
18121812

18131813
statement error pgcode 22036 pq: jsonpath item method .floor\(\) can only be applied to a numeric value
18141814
SELECT jsonb_path_query('{}', '(null).floor()');
1815+
1816+
query T
1817+
SELECT jsonb_path_query('"Hello"', '$ like_regex "hello" flag "i"');
1818+
----
1819+
true
1820+
1821+
query T
1822+
SELECT jsonb_path_query('"HELLO"', '$ like_regex "hello" flag "i"');
1823+
----
1824+
true
1825+
1826+
# Use the same pattern but different flags to ensure that ReCache recognizes
1827+
# the change of flags.
1828+
query T
1829+
SELECT jsonb_path_query('"HELLO"', '$ like_regex "hello" flag ""');
1830+
----
1831+
false
1832+
1833+
query T
1834+
SELECT jsonb_path_query('"Hello\nWorld"', '$ like_regex "Hello.World" flag "s"');
1835+
----
1836+
true
1837+
1838+
query T
1839+
SELECT jsonb_path_query('"Hello\nWorld"', '$ like_regex "Hello.World" flag ""');
1840+
----
1841+
false
1842+
1843+
query T
1844+
SELECT jsonb_path_query('"Line1\nLine2"', '$ like_regex "^Line2$" flag "m"');
1845+
----
1846+
true
1847+
1848+
query T
1849+
SELECT jsonb_path_query('"Line1\nLine2"', '$ like_regex "^Line2$" flag ""');
1850+
----
1851+
false
1852+
1853+
query T
1854+
SELECT jsonb_path_query('"Hello123World"', '$ like_regex "Hello.*World" flag "q"');
1855+
----
1856+
false
1857+
1858+
query T
1859+
SELECT jsonb_path_query('"Hello123World"', '$ like_regex "Hello.*World" flag ""');
1860+
----
1861+
true
1862+
1863+
# Case insensitive and dot matches newline
1864+
query T
1865+
SELECT jsonb_path_query('"Hello\nWorld"', '$ like_regex "hello.world" flag "is"');
1866+
----
1867+
true
1868+
1869+
# Case insensitive and multiline
1870+
query T
1871+
SELECT jsonb_path_query('"Line1\nline2"', '$ like_regex "^LINE2$" flag "im"');
1872+
----
1873+
true
1874+
1875+
# Case insensitive and literal matching
1876+
query T
1877+
SELECT jsonb_path_query('"Hello123World"', '$ like_regex "HELLO.*WORLD" flag "iq"');
1878+
----
1879+
false
1880+
1881+
# Dot matches newline and multiline
1882+
query T
1883+
SELECT jsonb_path_query('"Line1\nLine2\nLine3"', '$ like_regex "^Line1.Line2.Line3$" flag "ms"');
1884+
----
1885+
true
1886+
1887+
query T
1888+
SELECT jsonb_path_query('"Line1\nLine2\nLine3"', '$ like_regex "^Line1.Line2.Line3$" flag "m"');
1889+
----
1890+
false
1891+
1892+
# Dot matches newline and literal matching
1893+
query T
1894+
SELECT jsonb_path_query('"Hello\nWorld"', '$ like_regex "Hello.World" flag "sq"');
1895+
----
1896+
false
1897+
1898+
# Multiline and literal matching
1899+
query T
1900+
SELECT jsonb_path_query('"Line1\nLine2"', '$ like_regex "^Line1\nLine2$" flag "mq"');
1901+
----
1902+
false

pkg/sql/logictest/testdata/logic_test/jsonpath

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,6 @@ SELECT 'last'::JSONPATH
159159
statement error pgcode 42601 pq: could not parse "@" as type jsonpath: @ is not allowed in root expressions
160160
SELECT '@'::JSONPATH
161161

162-
statement error unimplemented
163-
SELECT '$ ? (@ like_regex ".*" flag "i")'::JSONPATH;
164-
165162
statement error unimplemented
166163
SELECT '$.keyvalue()'::JSONPATH;
167164

pkg/sql/sem/builtins/builtins.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10780,7 +10780,9 @@ type regexpEscapeKey struct {
1078010780
sqlEscape string
1078110781
}
1078210782

10783-
// Pattern implements the RegexpCacheKey interface.
10783+
var _ tree.RegexpCacheKey = regexpEscapeKey{}
10784+
10785+
// Pattern implements the tree.RegexpCacheKey interface.
1078410786
func (k regexpEscapeKey) Pattern() (string, error) {
1078510787
pattern := k.sqlPattern
1078610788
if k.sqlEscape != `\` {
@@ -10812,7 +10814,9 @@ type regexpFlagKey struct {
1081210814
sqlFlags string
1081310815
}
1081410816

10815-
// Pattern implements the RegexpCacheKey interface.
10817+
var _ tree.RegexpCacheKey = regexpFlagKey{}
10818+
10819+
// Pattern implements the tree.RegexpCacheKey interface.
1081610820
func (k regexpFlagKey) Pattern() (string, error) {
1081710821
return regexpEvalFlags(k.sqlPattern, k.sqlFlags)
1081810822
}

pkg/sql/sem/eval/match.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,8 @@ type likeKey struct {
315315
escape rune
316316
}
317317

318+
var _ tree.RegexpCacheKey = likeKey{}
319+
318320
// LikeEscape converts a like pattern to a regexp pattern.
319321
func LikeEscape(pattern string) (string, error) {
320322
key := likeKey{s: pattern, caseInsensitive: false, escape: '\\'}
@@ -770,7 +772,8 @@ func (k likeKey) patternNoAnchor() (string, error) {
770772
return pattern, nil
771773
}
772774

773-
// Pattern implements the RegexpCacheKey interface.
775+
// Pattern implements the tree.RegexpCacheKey interface.
776+
//
774777
// The strategy for handling custom escape character
775778
// is to convert all unescaped escape character into '\'.
776779
// k.escape can either be empty or a single character.
@@ -787,7 +790,9 @@ type similarToKey struct {
787790
escape rune
788791
}
789792

790-
// Pattern implements the RegexpCacheKey interface.
793+
var _ tree.RegexpCacheKey = similarToKey{}
794+
795+
// Pattern implements the tree.RegexpCacheKey interface.
791796
func (k similarToKey) Pattern() (string, error) {
792797
pattern := similarEscapeCustomChar(k.s, k.escape, k.escape != 0)
793798
return anchorPattern(pattern, false), nil
@@ -834,7 +839,9 @@ type regexpKey struct {
834839
caseInsensitive bool
835840
}
836841

837-
// Pattern implements the RegexpCacheKey interface.
842+
var _ tree.RegexpCacheKey = regexpKey{}
843+
844+
// Pattern implements the tree.RegexpCacheKey interface.
838845
func (k regexpKey) Pattern() (string, error) {
839846
if k.caseInsensitive {
840847
return caseInsensitive(k.s), nil

pkg/sql/sem/tree/regexp_cache.go

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package tree
77

88
import (
99
"regexp"
10+
"regexp/syntax"
1011

1112
"github.com/cockroachdb/cockroach/pkg/util/cache"
1213
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
@@ -47,6 +48,22 @@ func NewRegexpCache(size int) *RegexpCache {
4748
// the given key, compiling the key's pattern if it is not already
4849
// in the cache.
4950
func (rc *RegexpCache) GetRegexp(key RegexpCacheKey) (*regexp.Regexp, error) {
51+
// syntax.Perl is the default flag for regexp.Compile.
52+
return rc.getRegexpInternal(key, syntax.Perl)
53+
}
54+
55+
// GetRegexpWithFlags consults the cache for the regular expressions stored for
56+
// the given key, compiling the key's pattern with the given flags if it is not
57+
// already in the cache.
58+
func (rc *RegexpCache) GetRegexpWithFlags(
59+
key RegexpCacheKey, flags syntax.Flags,
60+
) (*regexp.Regexp, error) {
61+
return rc.getRegexpInternal(key, flags)
62+
}
63+
64+
func (rc *RegexpCache) getRegexpInternal(
65+
key RegexpCacheKey, flags syntax.Flags,
66+
) (*regexp.Regexp, error) {
5067
if rc != nil {
5168
re := rc.lookup(key)
5269
if re != nil {
@@ -58,8 +75,19 @@ func (rc *RegexpCache) GetRegexp(key RegexpCacheKey) (*regexp.Regexp, error) {
5875
if err != nil {
5976
return nil, err
6077
}
61-
62-
re, err := regexp.Compile(pattern)
78+
var re *regexp.Regexp
79+
if flags == syntax.Perl {
80+
// Avoid the redundant 'parse - stringify - parse (within Compile)'
81+
// sequence in the common case.
82+
re, err = regexp.Compile(pattern)
83+
} else {
84+
var parsed *syntax.Regexp
85+
parsed, err = syntax.Parse(pattern, flags)
86+
if err != nil {
87+
return nil, err
88+
}
89+
re, err = regexp.Compile(parsed.String())
90+
}
6391
if err != nil {
6492
return nil, err
6593
}

pkg/util/jsonpath/eval/operation.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ func evalRegexFunc(op jsonpath.Operation, l, _ json.JSON) (jsonpathBool, error)
183183
return jsonpathBoolUnknown, err
184184
}
185185

186-
r, err := parser.ReCache.GetRegexp(regexOp)
186+
r, err := parser.ReCache.GetRegexpWithFlags(regexOp, regexOp.Flags)
187187
if err != nil {
188188
return jsonpathBoolUnknown, err
189189
}

pkg/util/jsonpath/parser/jsonpath.y

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,13 @@ func extractNumericScalar(expr jsonpath.Path) (jsonpath.Scalar, bool) {
171171
return scalar, true
172172
}
173173

174-
func regexBinaryOp(left jsonpath.Path, regex string) (jsonpath.Operation, error) {
175-
r := jsonpath.Regex{Regex: regex}
176-
_, err := ReCache.GetRegexp(r)
174+
func regexBinaryOp(left jsonpath.Path, regex string, flags string) (jsonpath.Operation, error) {
175+
goFlags, err := jsonpath.RegexFlagsToGoFlags(flags)
176+
if err != nil {
177+
return jsonpath.Operation{}, err
178+
}
179+
r := jsonpath.Regex{Regex: regex, Flags: goFlags}
180+
_, err = ReCache.GetRegexpWithFlags(r, goFlags)
177181
if err != nil {
178182
return jsonpath.Operation{}, pgerror.Wrapf(err, pgcode.InvalidRegularExpression,
179183
"invalid regular expression")
@@ -493,15 +497,19 @@ predicate:
493497
}
494498
| expr LIKE_REGEX STR
495499
{
496-
regex, err := regexBinaryOp($1.path(), $3)
500+
regex, err := regexBinaryOp($1.path(), $3, "")
497501
if err != nil {
498502
return setErr(jsonpathlex, err)
499503
}
500504
$$.val = regex
501505
}
502506
| expr LIKE_REGEX STR FLAG STR
503507
{
504-
return unimplemented(jsonpathlex, "regex with flags")
508+
regex, err := regexBinaryOp($1.path(), $3, $5)
509+
if err != nil {
510+
return setErr(jsonpathlex, err)
511+
}
512+
$$.val = regex
505513
}
506514
;
507515

pkg/util/jsonpath/parser/testdata/jsonpath

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,50 @@ $.a.ceiling()
697697
----
698698
$."a".ceiling() -- normalized!
699699

700+
parse
701+
"a" like_regex ".*" flag ""
702+
----
703+
("a" like_regex ".*") -- normalized!
704+
705+
error
706+
"a" like_regex ".*" flag " "
707+
----
708+
at or near " ": syntax error: unrecognized flag character ' ' in LIKE_REGEX predicate
709+
DETAIL: source SQL:
710+
"a" like_regex ".*" flag " "
711+
^
712+
713+
error
714+
"a" like_regex ".*" flag "foo"
715+
----
716+
at or near "foo": syntax error: unrecognized flag character 'f' in LIKE_REGEX predicate
717+
DETAIL: source SQL:
718+
"a" like_regex ".*" flag "foo"
719+
^
720+
721+
error
722+
"a" like_regex ".*" flag "x"
723+
----
724+
at or near "x": syntax error: XQuery "x" flag (expanded regular expressions) is not implemented
725+
DETAIL: source SQL:
726+
"a" like_regex ".*" flag "x"
727+
^
728+
729+
parse
730+
"a" like_regex ".*" flag "ii"
731+
----
732+
("a" like_regex ".*" flag "i") -- normalized!
733+
734+
parse
735+
"a" like_regex ".*" flag "si"
736+
----
737+
("a" like_regex ".*" flag "is") -- normalized!
738+
739+
parse
740+
"a" like_regex ".*" flag "qqqmmmsssiii"
741+
----
742+
("a" like_regex ".*" flag "ismq") -- normalized!
743+
700744
# parse
701745
# $.1a
702746
# ----

0 commit comments

Comments
 (0)