Skip to content

Commit 880272d

Browse files
craig[bot]ZhouXing19
andcommitted
Merge #149251
149251: jsonpath: allow case insensitivity for keyword ident r=ZhouXing19 a=ZhouXing19 Epic: None Fix: #144255 Previously, if the keyword is not all capital letter, it will cause syntax error. For example, `strIcT $.STRiCt == null` will failed the parsing with `at or near "strIcT": syntax error`. This is because the lexer sees `strIcT $.STRiCt == null` as `IDENT $.IDENT == NULL`, which is a rule that is not found in `jsonpath.y`. Note that the lexer recognizes `strIcT` and `STRiCt` as `IDENT` by [the id](https://github.com/cockroachdb/cockroach/blob/ac3b2b749ca61a781566557faf29daa08ae2f9b5/pkg/util/jsonpath/parser/lexer.go#L69-L79), not by the string. So this commit is to set the ID for all string whose lowercased form matches a keyword. More specifically, we now do `lval.SetID(lexbase.GetKeywordID(strings.ToLower(lval.Str())))` in the scanning logic. So with the same example, `strIcT $.STRiCt == null` is now recognized as `STRICT $.STRICT == NULL` by the parser, for which a rule exists in the yacc and it will pass the **Lexical Analysis**. Then, at the **Syntactic Analysis** step, the keyword `STRICT` will be normalized, while the property accessing part, `$.STRiCt`, will remain the same, as the original str `STRiCt` is still reserved. Release note (bug fix): Support any casing for keyword identifiers in a jsonpath query. Note that for 3 special identifiers "TRUE", "FALSE", "NULL", crdb parses them with case insensitivity, but PG parses them case sensitively, e.g. `select '$.active == TrUe'::jsonpath;` will work in crdb but not in pg. Co-authored-by: ZhouXing19 <[email protected]>
2 parents f4104d4 + 8219048 commit 880272d

File tree

4 files changed

+207
-2
lines changed

4 files changed

+207
-2
lines changed

pkg/sql/logictest/testdata/logic_test/jsonb_path_query

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,12 @@ SELECT jsonb_path_query('{"a": [1,2,3]}', 'strict $.a ? (1 == 1)');
629629
----
630630
[1, 2, 3]
631631

632+
# Ensure keyword ("strict") is parsed case-insensitively.
633+
query T
634+
SELECT jsonb_path_query('{"a": [1,2,3]}', 'StriCt $.a ? (1 == 1)');
635+
----
636+
[1, 2, 3]
637+
632638
query empty
633639
SELECT jsonb_path_query('{"a": [1,2,3]}', 'strict $.a ? (1 != 1)');
634640

@@ -913,6 +919,13 @@ SELECT jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\
913919
"abc"
914920
"abdacb"
915921

922+
# Ensure keywords ("lax", "like_regex") are parsed case-insensitively.
923+
query T rowsort
924+
SELECT jsonb_path_query('[null, 1, "abc", "abd", "aBdC", "abdacb", "babc", "adc\nabc", "ab\nadc"]', 'LaX $[*] ? (@ like_rEgeX "^ab.*c")');
925+
----
926+
"abc"
927+
"abdacb"
928+
916929
query T
917930
SELECT jsonb_path_query('"He said \"Hello\\World!\""', '$ ? (@ like_regex ".*\"H.*\\\\.*!.*\".*")');
918931
----
@@ -1059,6 +1072,12 @@ SELECT jsonb_path_query('[1, 2, 3, 4]', '$[last]');
10591072
----
10601073
4
10611074

1075+
# Ensure keyword ("last") is parsed case-insensitively.
1076+
query T
1077+
SELECT jsonb_path_query('[1, 2, 3, 4]', '$[LaSt]');
1078+
----
1079+
4
1080+
10621081
query T
10631082
SELECT jsonb_path_query('"hello"', '$[last]');
10641083
----
@@ -1107,6 +1126,12 @@ SELECT jsonb_path_query('{}', '(null like_regex "^he.*$") is unknown');
11071126
----
11081127
true
11091128

1129+
# Ensure keyword ("like_regex") is parsed case-insensitively.
1130+
query T
1131+
SELECT jsonb_path_query('{}', '(null like_RegEx "^he.*$") is unknown');
1132+
----
1133+
true
1134+
11101135
query T
11111136
SELECT jsonb_path_query('"abcdef"', '$ starts with "abc"');
11121137
----
@@ -1898,3 +1923,63 @@ query T
18981923
SELECT jsonb_path_query('"Line1\nLine2"', '$ like_regex "^Line1\nLine2$" flag "mq"');
18991924
----
19001925
false
1926+
1927+
1928+
# If a key is accidentally a keyword, it won't be normalized.
1929+
query T
1930+
SELECT jsonb_path_query('{"STRICT": 1}'::JSONB, 'strIct $.STRICT'::JSONPATH);
1931+
----
1932+
1
1933+
1934+
query T
1935+
SELECT jsonb_path_query('{"STRICT": 1}'::JSONB, 'lax $.STRICT'::JSONPATH);
1936+
----
1937+
1
1938+
1939+
query T
1940+
SELECT jsonb_path_query('{"STRICT": 1}'::JSONB, 'lax $.strict'::JSONPATH);
1941+
----
1942+
1943+
query T
1944+
SELECT jsonb_path_query('{"STRICt": 1}'::JSONB, '$.STRICt'::JSONPATH);
1945+
----
1946+
1
1947+
1948+
query T
1949+
SELECT jsonb_path_query('{"STRICt": 1}'::JSONB, '$.STRICT'::JSONPATH);
1950+
----
1951+
1952+
query T
1953+
SELECT jsonb_path_query('{"strict": 1}'::JSONB, '$.STRICT'::JSONPATH);
1954+
----
1955+
1956+
query T
1957+
SELECT jsonb_path_query('{"strict": 1}'::JSONB, 'lax $.STRICT'::JSONPATH);
1958+
----
1959+
1960+
query T
1961+
SELECT jsonb_path_query('{"strict": 1}'::JSONB, '$.STRICt'::JSONPATH);
1962+
----
1963+
1964+
query T
1965+
SELECT jsonb_path_query('{"strict": 1}'::JSONB, 'lax $.STRICt'::JSONPATH);
1966+
----
1967+
1968+
query T
1969+
SELECT jsonb_path_query('{"strict": 1}'::JSONB, '$.strict'::JSONPATH);
1970+
----
1971+
1
1972+
1973+
query T
1974+
SELECT jsonb_path_query('{"LIKE_REGEX": 1}'::JSONB, '$.LIKE_REGEX'::JSONPATH);
1975+
----
1976+
1
1977+
1978+
query T
1979+
SELECT jsonb_path_query('{"LIKE_REGEx": 1}'::JSONB, '$.LIKE_REGEx'::JSONPATH);
1980+
----
1981+
1
1982+
1983+
query T
1984+
SELECT jsonb_path_query('{"LIKE_REGEx": 1}'::JSONB, '$.LIKE_REGEX'::JSONPATH);
1985+
----

pkg/sql/logictest/testdata/logic_test/jsonpath

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,23 @@ SELECT 'strict $'::JSONPATH
2727
----
2828
strict $
2929

30+
# Ensure keyword ("strict") is parsed case-insensitively.
31+
query T
32+
SELECT 'sTrict $'::JSONPATH
33+
----
34+
strict $
35+
3036
query T
3137
SELECT 'lax $'::JSONPATH
3238
----
3339
$
3440

41+
# Ensure keyword ("lax") is parsed case-insensitively.
42+
query T
43+
SELECT 'LaX $'::JSONPATH
44+
----
45+
$
46+
3547
query T
3648
SELECT '$.a1[*]'::JSONPATH
3749
----
@@ -210,6 +222,13 @@ SELECT '$.*'::JSONPATH
210222
----
211223
$.*
212224

225+
# keyword "strict" is case-insensitive, but a key ("STRIcT") will never be normalized
226+
# even if it matches the same spelling of a keyword.
227+
query T
228+
SELECT 'strIct $.STRIcT'::JSONPATH
229+
----
230+
strict $."STRIcT"
231+
213232
## When we allow table creation
214233

215234
# statement ok

pkg/sql/scanner/jsonpath_scan.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package scanner
77

88
import (
9+
"strings"
10+
911
sqllexbase "github.com/cockroachdb/cockroach/pkg/sql/lexbase"
1012
"github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser/lexbase"
1113
)
@@ -136,9 +138,10 @@ func isIdentMiddle(ch int) bool {
136138

137139
// scanIdent is similar to Scanner.scanIdent, but uses Jsonpath tokens.
138140
func (s *JSONPathScanner) scanIdent(lval ScanSymType) {
139-
// TODO(#144255): Allow any case for specific identifiers (strict, lax, to)
140141
s.normalizeIdent(lval, isIdentMiddle, false /* toLower */)
141-
lval.SetID(lexbase.GetKeywordID(lval.Str()))
142+
// Postgres is case-insensitive for keywords, see
143+
// https://github.com/cockroachdb/cockroach/issues/144255.
144+
lval.SetID(lexbase.GetKeywordID(strings.ToLower(lval.Str())))
142145
}
143146

144147
// scanNumber is similar to Scanner.scanNumber, but uses Jsonpath tokens.

pkg/util/jsonpath/parser/testdata/jsonpath

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,104 @@ parse
741741
----
742742
("a" like_regex ".*" flag "ismq") -- normalized!
743743

744+
# Subtests with casing for idents.
745+
parse
746+
sTrict $
747+
----
748+
strict $ -- normalized!
749+
750+
parse
751+
Lax $
752+
----
753+
$ -- normalized!
754+
755+
parse
756+
$ ? (exiSts(1))
757+
----
758+
$?(exists (1)) -- normalized!
759+
760+
parse
761+
"a" like_Regex ".*" flAg "i"
762+
----
763+
("a" like_regex ".*" flag "i") -- normalized!
764+
765+
parse
766+
(1 == 1) Is unKnown
767+
----
768+
(1 == 1) is unknown -- normalized!
769+
770+
parse
771+
$[1 tO 5, lAst]
772+
----
773+
$[1 to 5,last] -- normalized!
774+
775+
parse
776+
"" sTarts With ""
777+
----
778+
("" starts with "") -- normalized!
779+
780+
parse
781+
$.active == false
782+
----
783+
($."active" == false) -- normalized!
784+
785+
# This will not work for PG as it is case-sensitive for "false" ident.
786+
# But for crdb we're case-insensitive for all idents.
787+
parse
788+
$.active == FaLse
789+
----
790+
($."active" == false) -- normalized!
791+
792+
parse
793+
$.active == true
794+
----
795+
($."active" == true) -- normalized!
796+
797+
# This will not work for PG as it is case-sensitive for "true" ident.
798+
# But for crdb we're case-insensitive for all idents.
799+
parse
800+
$.active == True
801+
----
802+
($."active" == true) -- normalized!
803+
804+
parse
805+
$.active == TrUe
806+
----
807+
($."active" == true) -- normalized!
808+
809+
# This will not work for PG as it is case-sensitive for "null" ident.
810+
# But for crdb we're case-insensitive for all idents.
811+
parse
812+
$.active == Null
813+
----
814+
($."active" == null) -- normalized!
815+
816+
parse
817+
$.active == null
818+
----
819+
($."active" == null) -- normalized!
820+
821+
# If an ident is accidentally a keyword, it will not be normalized.
822+
parse
823+
$.STRICT == null
824+
----
825+
($."STRICT" == null) -- normalized!
826+
827+
parse
828+
strIcT $.STRICT == null
829+
----
830+
strict ($."STRICT" == null) -- normalized!
831+
832+
parse
833+
$.STRiCt == null
834+
----
835+
($."STRiCt" == null) -- normalized!
836+
837+
parse
838+
strIcT $.STRiCt == null
839+
----
840+
strict ($."STRiCt" == null) -- normalized!
841+
744842
# parse
745843
# $.1a
746844
# ----

0 commit comments

Comments
 (0)