Skip to content

Commit ab86edb

Browse files
authored
feat(query): Enhance JSON parsing support extended json5 syntax (#18550)
1 parent 628d1d2 commit ab86edb

File tree

6 files changed

+104
-28
lines changed

6 files changed

+104
-28
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ jaq-interpret = "1.5.0"
359359
jaq-parse = "1.0.3"
360360
jaq-std = "1.6.0"
361361
jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] }
362-
jsonb = "0.5.3"
362+
jsonb = "0.5.4"
363363
jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] }
364364
lenient_semver = "0.4.2"
365365
levenshtein_automata = "0.2.1"

src/query/functions/tests/it/scalars/testdata/variant.txt

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,6 @@ output domain : {NULL}
77
output : NULL
88

99

10-
error:
11-
--> SQL:1:1
12-
|
13-
1 | parse_json('nuLL')
14-
| ^^^^^^^^^^^^^^^^^^ expected ident, pos 3 while evaluating function `parse_json('nuLL')` in expr `CAST('nuLL' AS Variant)`
15-
16-
17-
1810
ast : parse_json('null')
1911
raw expr : parse_json('null')
2012
checked expr : CAST<String>("null" AS Variant)
@@ -24,14 +16,6 @@ output domain : Undefined
2416
output : 'null'
2517

2618

27-
error:
28-
--> SQL:1:1
29-
|
30-
1 | parse_json(' ')
31-
| ^^^^^^^^^^^^^^^^ EOF while parsing a value, pos 2 while evaluating function `parse_json(' ')` in expr `CAST(' ' AS Variant)`
32-
33-
34-
3519
ast : parse_json('true')
3620
raw expr : parse_json('true')
3721
checked expr : CAST<String>("true" AS Variant)
@@ -146,6 +130,24 @@ evaluation (internal):
146130
+--------+------------------------------------------------------------------------------------------------------------------------------------+
147131

148132

133+
ast : parse_json(' ')
134+
raw expr : parse_json(' ')
135+
checked expr : CAST<String>(" " AS Variant)
136+
optimized expr : 0x2000000000000000
137+
output type : Variant
138+
output domain : Undefined
139+
output : 'null'
140+
141+
142+
ast : parse_json('nuLL')
143+
raw expr : parse_json('nuLL')
144+
checked expr : CAST<String>("nuLL" AS Variant)
145+
optimized expr : 0x2000000000000000
146+
output type : Variant
147+
output domain : Undefined
148+
output : 'null'
149+
150+
149151
ast : parse_json('+10')
150152
raw expr : parse_json('+10')
151153
checked expr : CAST<String>("+10" AS Variant)
@@ -182,6 +184,24 @@ output domain : Undefined
182184
output : '12'
183185

184186

187+
ast : parse_json('0xabc')
188+
raw expr : parse_json('0xabc')
189+
checked expr : CAST<String>("0xabc" AS Variant)
190+
optimized expr : 0x2000000020000003500abc
191+
output type : Variant
192+
output domain : Undefined
193+
output : '2748'
194+
195+
196+
ast : parse_json('0x12abc.def')
197+
raw expr : parse_json('0x12abc.def')
198+
checked expr : CAST<String>("0x12abc.def" AS Variant)
199+
optimized expr : 0x20000000200000096040f2abcdef000000
200+
output type : Variant
201+
output domain : Undefined
202+
output : '76476.87084960938'
203+
204+
185205
ast : parse_json('99999999999999999999999999999999999999')
186206
raw expr : parse_json('99999999999999999999999999999999999999')
187207
checked expr : CAST<String>("99999999999999999999999999999999999999" AS Variant)
@@ -191,6 +211,15 @@ output domain : Undefined
191211
output : '99999999999999999999999999999999999999'
192212

193213

214+
ast : parse_json('\'single quoted string\'')
215+
raw expr : parse_json('\'single quoted string\'')
216+
checked expr : CAST<String>("'single quoted string'" AS Variant)
217+
optimized expr : 0x200000001000001473696e676c652071756f74656420737472696e67
218+
output type : Variant
219+
output domain : Undefined
220+
output : '"single quoted string"'
221+
222+
194223
ast : parse_json('[1,2,,4]')
195224
raw expr : parse_json('[1,2,,4]')
196225
checked expr : CAST<String>("[1,2,,4]" AS Variant)
@@ -200,6 +229,23 @@ output domain : Undefined
200229
output : '[1,2,null,4]'
201230

202231

232+
ast : parse_json('{ key :"val", key123_$测试 :"val" }')
233+
raw expr : parse_json('{ key :"val", key123_$测试 :"val" }')
234+
checked expr : CAST<String>("{ key :\"val\", key123_$测试 :\"val\" }" AS Variant)
235+
optimized expr : 0x40000002100000031000000e10000003100000036b65796b65793132335f24e6b58be8af9576616c76616c
236+
output type : Variant
237+
output domain : Undefined
238+
output : '{"key":"val","key123_$测试":"val"}'
239+
240+
241+
error:
242+
--> SQL:1:1
243+
|
244+
1 | parse_json('{ 123 :"val" }')
245+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ object attribute name cannot be a number, pos 3 while evaluating function `parse_json('{ 123 :"val" }')` in expr `CAST('{ 123 :"val" }' AS Variant)`
246+
247+
248+
203249
ast : try_parse_json(NULL)
204250
raw expr : try_parse_json(NULL)
205251
checked expr : try_parse_json<Variant NULL>(CAST<NULL>(NULL AS Variant NULL))
@@ -212,10 +258,10 @@ output : NULL
212258
ast : try_parse_json('nuLL')
213259
raw expr : try_parse_json('nuLL')
214260
checked expr : try_parse_json<String>("nuLL")
215-
optimized expr : NULL
261+
optimized expr : 0x2000000000000000
216262
output type : Variant NULL
217-
output domain : {NULL}
218-
output : NULL
263+
output domain : Undefined
264+
output : 'null'
219265

220266

221267
ast : try_parse_json('null')
@@ -363,10 +409,10 @@ output : NULL
363409
ast : check_json('nuLL')
364410
raw expr : check_json('nuLL')
365411
checked expr : check_json<String>("nuLL")
366-
optimized expr : "expected ident, pos 3"
412+
optimized expr : NULL
367413
output type : String NULL
368-
output domain : {"expected ident, pos 3"..="expected ident, pos 3"}
369-
output : 'expected ident, pos 3'
414+
output domain : {NULL}
415+
output : NULL
370416

371417

372418
ast : check_json(s)

src/query/functions/tests/it/scalars/variant.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,7 @@ fn test_variant() {
8989

9090
fn test_parse_json(file: &mut impl Write) {
9191
run_ast(file, "parse_json(NULL)", &[]);
92-
run_ast(file, "parse_json('nuLL')", &[]);
9392
run_ast(file, "parse_json('null')", &[]);
94-
run_ast(file, "parse_json(' ')", &[]);
9593
run_ast(file, "parse_json('true')", &[]);
9694
run_ast(file, "parse_json('false')", &[]);
9795
run_ast(file, "parse_json('\"测试\"')", &[]);
@@ -124,16 +122,27 @@ fn test_parse_json(file: &mut impl Write) {
124122
)]);
125123

126124
// json extension syntax
125+
run_ast(file, "parse_json(' ')", &[]);
126+
run_ast(file, "parse_json('nuLL')", &[]);
127127
run_ast(file, "parse_json('+10')", &[]);
128128
run_ast(file, "parse_json('001')", &[]);
129129
run_ast(file, "parse_json('.12')", &[]);
130130
run_ast(file, "parse_json('12.')", &[]);
131+
run_ast(file, "parse_json('0xabc')", &[]);
132+
run_ast(file, "parse_json('0x12abc.def')", &[]);
131133
run_ast(
132134
file,
133135
"parse_json('99999999999999999999999999999999999999')",
134136
&[],
135137
);
138+
run_ast(file, r#"parse_json('\'single quoted string\'')"#, &[]);
136139
run_ast(file, "parse_json('[1,2,,4]')", &[]);
140+
run_ast(
141+
file,
142+
"parse_json('{ key :\"val\", key123_$测试 :\"val\" }')",
143+
&[],
144+
);
145+
run_ast(file, "parse_json('{ 123 :\"val\" }')", &[]);
137146
}
138147

139148
fn test_try_parse_json(file: &mut impl Write) {

tests/sqllogictests/suites/query/functions/02_0048_function_semi_structureds_parse_json.test

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,26 @@ select parse_json('[1,2, ,4]'), parse_json('[,2]'), parse_json('[1,]')
8484
----
8585
[1,2,null,4] [null,2] [1,null]
8686

87+
query TTTTT
88+
select parse_json(' '), parse_json('NULL'), parse_json('True'), parse_json('nan'), parse_json('+infinity')
89+
----
90+
null null true null null
91+
92+
query TTT
93+
select parse_json('0x123'), parse_json('0XABCDEF'), parse_json('0XABC.DEF')
94+
----
95+
291 11259375 2748.870849609375
96+
97+
query TT
98+
select parse_json('\'abc\''), parse_json('\'测试\'')
99+
----
100+
"abc" "测试"
101+
102+
query TT
103+
select parse_json('{key:"val"}'), parse_json('{_中文_$key123:"val"}')
104+
----
105+
{"key":"val"} {"_中文_$key123":"val"}
106+
87107
statement error 1006
88108
select parse_json('[1,')
89109

@@ -242,3 +262,4 @@ select to_variant(a), to_variant(b), to_variant(c) from t3
242262
statement ok
243263
DROP DATABASE db1
244264

265+

tests/sqllogictests/suites/stage/formats/csv/csv_types.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ create table iv(a int not null, b variant not null)
1717
query TIITI
1818
copy into iv from @data/csv/invalid_variant.csv FILE_FORMAT = (field_delimiter = '\t' record_delimiter = '\n' type = CSV) disable_variant_check = false ON_ERROR = CONTINUE
1919
----
20-
csv/invalid_variant.csv 1 1 Invalid value 'invalidvariant' for column 1 (b Variant): expected value, pos 1 1
20+
csv/invalid_variant.csv 1 1 Invalid value 'invalidvariant' for column 1 (b Variant): expected ident, pos 3 1
2121

2222
query IT
2323
select * from iv

0 commit comments

Comments
 (0)