Skip to content

Commit beca0a1

Browse files
authored
feat(query): Enhance JSON Parsing with Decimal Support and Extended Syntax (#18252)
* feat(query): Enhance JSON Parsing with Decimal Support and Extended Syntax * fix
1 parent 904b33a commit beca0a1

File tree

23 files changed

+284
-397
lines changed

23 files changed

+284
-397
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ jaq-interpret = "1.5.0"
362362
jaq-parse = "1.0.3"
363363
jaq-std = "1.6.0"
364364
jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] }
365-
jsonb = "0.5.1"
365+
jsonb = "0.5.2"
366366
jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] }
367367
lenient_semver = "0.4.2"
368368
levenshtein_automata = "0.2.1"
@@ -651,7 +651,6 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226
651651
color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" }
652652
deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "9954bff" }
653653
display-more = { git = "https://github.com/databendlabs/display-more", tag = "v0.1.3" }
654-
jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "dcaf261" }
655654
map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" }
656655
openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
657656
openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" }

src/meta/proto-conv/tests/it/v129_vector_datatype copy.rs

Lines changed: 0 additions & 275 deletions
This file was deleted.

src/query/ee/tests/it/storages/fuse/operations/virtual_columns_builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ async fn test_virtual_column_builder() -> Result<()> {
245245
"['geo']['lat']",
246246
)
247247
.unwrap();
248-
assert_eq!(meta_geo_lat.data_type, VariantDataType::Float64);
248+
assert_eq!(meta_geo_lat.data_type, VariantDataType::Jsonb);
249249

250250
let entries = vec![
251251
Int32Type::from_data(vec![1, 2, 3, 4, 5, 6, 7, 8]).into(),

src/query/expression/src/types/variant.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,24 +245,21 @@ pub fn cast_scalar_to_variant(
245245
},
246246
ScalarRef::Decimal(x) => match x {
247247
DecimalScalar::Decimal64(value, size) => {
248-
let dec = jsonb::Decimal128 {
249-
precision: size.precision(),
248+
let dec = jsonb::Decimal64 {
250249
scale: size.scale(),
251-
value: value as i128,
250+
value,
252251
};
253-
jsonb::Value::Number(jsonb::Number::Decimal128(dec))
252+
jsonb::Value::Number(jsonb::Number::Decimal64(dec))
254253
}
255254
DecimalScalar::Decimal128(value, size) => {
256255
let dec = jsonb::Decimal128 {
257-
precision: size.precision(),
258256
scale: size.scale(),
259257
value,
260258
};
261259
jsonb::Value::Number(jsonb::Number::Decimal128(dec))
262260
}
263261
DecimalScalar::Decimal256(value, size) => {
264262
let dec = jsonb::Decimal256 {
265-
precision: size.precision(),
266263
scale: size.scale(),
267264
value: value.0,
268265
};

src/query/functions/src/scalars/decimal/src/cast_from_jsonb.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,25 @@ where
111111
float_to_decimal(F64::from(v), min, max, multiplier_f64, rounding_mode)
112112
.map(|v| Some(v))
113113
}
114+
JsonbNumber::Decimal64(d) => {
115+
let from_size = DecimalSize::new_unchecked(i64::MAX_PRECISION, d.scale);
116+
match dest_type {
117+
DecimalDataType::Decimal64(_) => {
118+
let x = d.value;
119+
let min = i64::min_for_precision(dest_size.precision());
120+
let max = i64::max_for_precision(dest_size.precision());
121+
decimal_to_decimal(x, min, max, from_size, dest_size, rounding_mode)
122+
.map(|v| Some(T::from_i128(v)))
123+
}
124+
DecimalDataType::Decimal128(_) | DecimalDataType::Decimal256(_) => {
125+
let x = T::from_i128(d.value);
126+
decimal_to_decimal(x, min, max, from_size, dest_size, rounding_mode)
127+
.map(|v| Some(v))
128+
}
129+
}
130+
}
114131
JsonbNumber::Decimal128(d) => {
115-
let from_size = DecimalSize::new_unchecked(d.precision, d.scale);
132+
let from_size = DecimalSize::new_unchecked(i128::MAX_PRECISION, d.scale);
116133
match dest_type {
117134
DecimalDataType::Decimal64(_) => {
118135
let x = d.value;
@@ -129,7 +146,7 @@ where
129146
}
130147
}
131148
JsonbNumber::Decimal256(d) => {
132-
let from_size = DecimalSize::new_unchecked(d.precision, d.scale);
149+
let from_size = DecimalSize::new_unchecked(i256::MAX_PRECISION, d.scale);
133150
match dest_type {
134151
DecimalDataType::Decimal64(_) | DecimalDataType::Decimal128(_) => {
135152
let x = i256(d.value);

src/query/functions/src/scalars/variant.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ pub fn register(registry: &mut FunctionRegistry) {
9999
registry.register_aliases("try_object_construct_keep_null", &[
100100
"try_json_object_keep_null",
101101
]);
102+
registry.register_aliases("is_float", &["is_double", "is_real"]);
102103

103104
registry.register_passthrough_nullable_1_arg::<VariantType, VariantType, _, _>(
104105
"parse_json",
@@ -1038,6 +1039,28 @@ pub fn register(registry: &mut FunctionRegistry) {
10381039
}),
10391040
);
10401041

1042+
registry.register_passthrough_nullable_1_arg::<VariantType, BooleanType, _, _>(
1043+
"is_decimal",
1044+
|_, _| FunctionDomain::Full,
1045+
vectorize_with_builder_1_arg::<VariantType, BooleanType>(|v, output, ctx| {
1046+
if let Some(validity) = &ctx.validity {
1047+
if !validity.get_bit(output.len()) {
1048+
output.push(false);
1049+
return;
1050+
}
1051+
}
1052+
match RawJsonb::new(v).as_number() {
1053+
Ok(Some(num)) => match num {
1054+
jsonb::Number::Float64(_) => output.push(false),
1055+
_ => output.push(true),
1056+
},
1057+
_ => {
1058+
output.push(false);
1059+
}
1060+
}
1061+
}),
1062+
);
1063+
10411064
registry.register_passthrough_nullable_1_arg::<VariantType, BooleanType, _, _>(
10421065
"is_string",
10431066
|_, _| FunctionDomain::Full,

src/query/functions/tests/it/aggregates/testdata/agg.txt

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,12 +1487,12 @@ evaluation (internal):
14871487

14881488
ast: json_array_agg(dec)
14891489
evaluation (internal):
1490-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1491-
| Column | Data |
1492-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1493-
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1494-
| Output | Variant([0x80000003200000132000001320000013700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) |
1495-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1490+
+--------+-----------------------------------------------------------------------------------------------------------+
1491+
| Column | Data |
1492+
+--------+-----------------------------------------------------------------------------------------------------------+
1493+
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1494+
| Output | Variant([0x800000032000000a2000000a2000000a70000000000000006e027000000000000000dc0270000000000000014a02]) |
1495+
+--------+-----------------------------------------------------------------------------------------------------------+
14961496

14971497

14981498
error: Json object have duplicate key 'k'
@@ -1523,13 +1523,13 @@ error: json_object_agg does not support key type 'Number(Int64)'
15231523

15241524
ast: json_object_agg(s, dec)
15251525
evaluation (internal):
1526-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1527-
| Column | Data |
1528-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1529-
| s | StringColumn[abc, def, opq, xyz] |
1530-
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1531-
| Output | Variant([0x4000000310000003100000031000000320000013200000132000001361626364656678797a700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) |
1532-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1526+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
1527+
| Column | Data |
1528+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
1529+
| s | StringColumn[abc, def, opq, xyz] |
1530+
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1531+
| Output | Variant([0x400000031000000310000003100000032000000a2000000a2000000a61626364656678797a70000000000000006e027000000000000000dc0270000000000000014a02]) |
1532+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
15331533

15341534

15351535
ast: mode(1)

src/query/functions/tests/it/aggregates/testdata/agg_group_by.txt

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,12 +1425,12 @@ evaluation (internal):
14251425

14261426
ast: json_array_agg(dec)
14271427
evaluation (internal):
1428-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1429-
| Column | Data |
1430-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1431-
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1432-
| Output | Variant([0x80000003200000132000001320000013700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) |
1433-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+
1428+
+--------+-----------------------------------------------------------------------------------------------------------+
1429+
| Column | Data |
1430+
+--------+-----------------------------------------------------------------------------------------------------------+
1431+
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1432+
| Output | Variant([0x800000032000000a2000000a2000000a70000000000000006e027000000000000000dc0270000000000000014a02]) |
1433+
+--------+-----------------------------------------------------------------------------------------------------------+
14341434

14351435

14361436
error: Json object have duplicate key 'k'
@@ -1461,13 +1461,13 @@ error: json_object_agg does not support key type 'Number(Int64)'
14611461

14621462
ast: json_object_agg(s, dec)
14631463
evaluation (internal):
1464-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1465-
| Column | Data |
1466-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1467-
| s | StringColumn[abc, def, opq, xyz] |
1468-
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1469-
| Output | Variant([0x4000000310000003100000031000000320000013200000132000001361626364656678797a700000000000000000000000000000006e0f0270000000000000000000000000000000dc0f02700000000000000000000000000000014a0f02]) |
1470-
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1464+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
1465+
| Column | Data |
1466+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
1467+
| s | StringColumn[abc, def, opq, xyz] |
1468+
| dec | NullableColumn { column: Decimal64([1.10, 2.20, 0.00, 3.30]), validity: [0b____1011] } |
1469+
| Output | Variant([0x400000031000000310000003100000032000000a2000000a2000000a61626364656678797a70000000000000006e027000000000000000dc0270000000000000014a02]) |
1470+
+--------+-----------------------------------------------------------------------------------------------------------------------------------------------------+
14711471

14721472

14731473
ast: mode(1)

src/query/functions/tests/it/scalars/testdata/cast.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ output : '-1'
231231
ast : CAST(1.1 AS VARIANT)
232232
raw expr : CAST(1.1 AS Variant)
233233
checked expr : CAST<Decimal(2, 1)>(1.1_d64(2,1) AS Variant)
234-
optimized expr : 0x2000000020000013700000000000000000000000000000000b0201
234+
optimized expr : 0x200000002000000a70000000000000000b01
235235
output type : Variant
236236
output domain : Undefined
237237
output : '1.1'
@@ -2409,7 +2409,7 @@ output : '-1'
24092409
ast : TRY_CAST(1.1 AS VARIANT)
24102410
raw expr : TRY_CAST(1.1 AS Variant)
24112411
checked expr : TRY_CAST<Decimal(2, 1)>(1.1_d64(2,1) AS Variant NULL)
2412-
optimized expr : 0x2000000020000013700000000000000000000000000000000b0201
2412+
optimized expr : 0x200000002000000a70000000000000000b01
24132413
output type : Variant NULL
24142414
output domain : Undefined
24152415
output : '1.1'

0 commit comments

Comments
 (0)