Skip to content

Commit 0c2fa1a

Browse files
authored
refactor(query): refactor json functions (#16840)
1 parent 57481f4 commit 0c2fa1a

File tree

20 files changed

+864
-577
lines changed

20 files changed

+864
-577
lines changed

Cargo.lock

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "7226
633633
color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" }
634634
deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "c149502" }
635635
ethnum = { git = "https://github.com/datafuse-extras/ethnum-rs", rev = "4cb05f1" }
636+
jsonb = { git = "https://github.com/databendlabs/jsonb", rev = "37d07f0" }
636637
map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.2.3" }
637638
openai_api_rust = { git = "https://github.com/datafuse-extras/openai-api", rev = "819a0ed" }
638639
openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.9" }

src/common/native/tests/it/native/io.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,31 @@ pub fn new_test_column() -> Vec<Column> {
5353
Column::Binary(BinaryColumn::from_iter(
5454
["abcdefg", "mn", "11", "", "3456", "xyz"].iter(),
5555
)),
56+
// use binary jsonb format values to test
5657
Column::Variant(BinaryColumn::from_iter(
57-
["abcdefg", "mn", "11", "", "3456", "xyz"].iter(),
58+
[
59+
// null
60+
vec![0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00],
61+
// "abc"
62+
vec![
63+
0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x03, 0x61, 0x62, 0x63,
64+
],
65+
// 123
66+
vec![0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x02, 0x50, 0x7B],
67+
// []
68+
vec![0x80, 0x00, 0x00, 0x00],
69+
// [1,2]
70+
vec![
71+
0x80, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x02, 0x50,
72+
0x01, 0x50, 0x02,
73+
],
74+
// {"k":"v"}
75+
vec![
76+
0x40, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x01, 0x10, 0x00, 0x00, 0x01, 0x6B,
77+
0x76,
78+
],
79+
]
80+
.iter(),
5881
)),
5982
]
6083
}

src/query/expression/src/types/variant.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use databend_common_io::deserialize_bitmap;
2020
use geozero::wkb::Ewkb;
2121
use geozero::ToJson;
2222
use jiff::tz::TimeZone;
23+
use jsonb::OwnedJsonb;
24+
use jsonb::RawJsonb;
2325
use jsonb::Value;
2426

2527
use super::binary::BinaryColumn;
@@ -182,7 +184,9 @@ impl ValueType for VariantType {
182184

183185
#[inline(always)]
184186
fn compare(lhs: Self::ScalarRef<'_>, rhs: Self::ScalarRef<'_>) -> Ordering {
185-
jsonb::compare(lhs, rhs).expect("unable to parse jsonb value")
187+
let left_jsonb = RawJsonb::new(lhs);
188+
let right_jsonb = RawJsonb::new(rhs);
189+
left_jsonb.cmp(&right_jsonb)
186190
}
187191
}
188192

@@ -235,7 +239,9 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec<u8
235239
ScalarRef::Interval(i) => interval_to_string(&i).to_string().into(),
236240
ScalarRef::Array(col) => {
237241
let items = cast_scalars_to_variants(col.iter(), tz);
238-
jsonb::build_array(items.iter(), buf).expect("failed to build jsonb array");
242+
let owned_jsonb = OwnedJsonb::build_array(items.iter().map(RawJsonb::new))
243+
.expect("failed to build jsonb array");
244+
buf.extend_from_slice(owned_jsonb.as_ref());
239245
return;
240246
}
241247
ScalarRef::Map(col) => {
@@ -255,8 +261,10 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec<u8
255261
cast_scalar_to_variant(v, tz, &mut val);
256262
kvs.insert(key, val);
257263
}
258-
jsonb::build_object(kvs.iter().map(|(k, v)| (k, &v[..])), buf)
259-
.expect("failed to build jsonb object from map");
264+
let owned_jsonb =
265+
OwnedJsonb::build_object(kvs.iter().map(|(k, v)| (k, RawJsonb::new(&v[..]))))
266+
.expect("failed to build jsonb object from map");
267+
buf.extend_from_slice(owned_jsonb.as_ref());
260268
return;
261269
}
262270
ScalarRef::Bitmap(b) => {
@@ -272,14 +280,14 @@ pub fn cast_scalar_to_variant(scalar: ScalarRef, tz: &TimeZone, buf: &mut Vec<u8
272280
}
273281
ScalarRef::Tuple(fields) => {
274282
let values = cast_scalars_to_variants(fields, tz);
275-
jsonb::build_object(
283+
let owned_jsonb = OwnedJsonb::build_object(
276284
values
277285
.iter()
278286
.enumerate()
279-
.map(|(i, bytes)| (format!("{}", i + 1), bytes)),
280-
buf,
287+
.map(|(i, bytes)| (format!("{}", i + 1), RawJsonb::new(bytes))),
281288
)
282289
.expect("failed to build jsonb object from tuple");
290+
buf.extend_from_slice(owned_jsonb.as_ref());
283291
return;
284292
}
285293
ScalarRef::Variant(bytes) => {

src/query/expression/src/utils/display.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use databend_common_io::geo_to_ewkt;
2828
use geozero::wkb::Ewkb;
2929
use itertools::Itertools;
3030
use jiff::tz::TimeZone;
31+
use jsonb::RawJsonb;
3132
use num_traits::FromPrimitive;
3233
use rust_decimal::Decimal;
3334
use rust_decimal::RoundingStrategy;
@@ -262,7 +263,8 @@ impl Display for ScalarRef<'_> {
262263
write!(f, ")")
263264
}
264265
ScalarRef::Variant(s) => {
265-
let value = jsonb::to_string(s);
266+
let raw_jsonb = RawJsonb::new(s);
267+
let value = raw_jsonb.to_string();
266268
write!(f, "'{value}'")
267269
}
268270
ScalarRef::Geometry(s) => {

src/query/expression/src/utils/variant_transform.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
use databend_common_exception::ErrorCode;
1616
use databend_common_exception::Result;
1717
use jsonb::parse_value;
18-
use jsonb::to_string;
18+
use jsonb::RawJsonb;
1919

2020
use crate::types::AnyType;
2121
use crate::types::DataType;
@@ -96,7 +96,8 @@ fn transform_scalar(scalar: ScalarRef<'_>, decode: bool) -> Result<Scalar> {
9696
}
9797
ScalarRef::Variant(data) => {
9898
if decode {
99-
Scalar::Variant(to_string(data).into_bytes())
99+
let raw_jsonb = RawJsonb::new(data);
100+
Scalar::Variant(raw_jsonb.to_string().into_bytes())
100101
} else {
101102
let value = parse_value(data).map_err(|err| {
102103
ErrorCode::UDFDataError(format!("parse json value error: {err}"))

src/query/expression/src/values.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use geo::Point;
3939
use geozero::CoordDimensions;
4040
use geozero::ToWkb;
4141
use itertools::Itertools;
42+
use jsonb::RawJsonb;
4243
use roaring::RoaringTreemap;
4344
use serde::de::Visitor;
4445
use serde::Deserialize;
@@ -776,7 +777,9 @@ impl PartialOrd for Scalar {
776777
(Scalar::Bitmap(b1), Scalar::Bitmap(b2)) => b1.partial_cmp(b2),
777778
(Scalar::Tuple(t1), Scalar::Tuple(t2)) => t1.partial_cmp(t2),
778779
(Scalar::Variant(v1), Scalar::Variant(v2)) => {
779-
jsonb::compare(v1.as_slice(), v2.as_slice()).ok()
780+
let left_jsonb = RawJsonb::new(v1);
781+
let right_jsonb = RawJsonb::new(v2);
782+
left_jsonb.partial_cmp(&right_jsonb)
780783
}
781784
(Scalar::Geometry(g1), Scalar::Geometry(g2)) => compare_geometry(g1, g2),
782785
(Scalar::Geography(g1), Scalar::Geography(g2)) => g1.partial_cmp(g2),
@@ -814,7 +817,11 @@ impl<'b> PartialOrd<ScalarRef<'b>> for ScalarRef<'_> {
814817
(ScalarRef::Map(m1), ScalarRef::Map(m2)) => m1.partial_cmp(m2),
815818
(ScalarRef::Bitmap(b1), ScalarRef::Bitmap(b2)) => b1.partial_cmp(b2),
816819
(ScalarRef::Tuple(t1), ScalarRef::Tuple(t2)) => t1.partial_cmp(t2),
817-
(ScalarRef::Variant(v1), ScalarRef::Variant(v2)) => jsonb::compare(v1, v2).ok(),
820+
(ScalarRef::Variant(v1), ScalarRef::Variant(v2)) => {
821+
let left_jsonb = RawJsonb::new(v1);
822+
let right_jsonb = RawJsonb::new(v2);
823+
left_jsonb.partial_cmp(&right_jsonb)
824+
}
818825
(ScalarRef::Geometry(g1), ScalarRef::Geometry(g2)) => compare_geometry(g1, g2),
819826
(ScalarRef::Geography(g1), ScalarRef::Geography(g2)) => g1.partial_cmp(g2),
820827
(ScalarRef::Interval(i1), ScalarRef::Interval(i2)) => i1.partial_cmp(i2),
@@ -913,9 +920,13 @@ impl PartialOrd for Column {
913920
col1.iter().partial_cmp(col2.iter())
914921
}
915922
(Column::Tuple(fields1), Column::Tuple(fields2)) => fields1.partial_cmp(fields2),
916-
(Column::Variant(col1), Column::Variant(col2)) => col1
917-
.iter()
918-
.partial_cmp_by(col2.iter(), |v1, v2| jsonb::compare(v1, v2).ok()),
923+
(Column::Variant(col1), Column::Variant(col2)) => {
924+
col1.iter().partial_cmp_by(col2.iter(), |v1, v2| {
925+
let left_jsonb = RawJsonb::new(v1);
926+
let right_jsonb = RawJsonb::new(v2);
927+
left_jsonb.partial_cmp(&right_jsonb)
928+
})
929+
}
919930
(Column::Geometry(col1), Column::Geometry(col2)) => {
920931
col1.iter().partial_cmp_by(col2.iter(), compare_geometry)
921932
}
@@ -1423,7 +1434,7 @@ impl Column {
14231434
DataType::Variant => {
14241435
let mut data = Vec::with_capacity(len);
14251436
for _ in 0..len {
1426-
let val = jsonb::rand_value();
1437+
let val = jsonb::Value::rand_value();
14271438
data.push(val.to_vec());
14281439
}
14291440
VariantType::from_data(data)

src/query/expression/tests/it/row.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ use databend_common_expression::RowConverter;
2828
use databend_common_expression::SortField;
2929
use ethnum::i256;
3030
use itertools::Itertools;
31-
use jsonb::convert_to_comparable;
3231
use jsonb::parse_value;
32+
use jsonb::RawJsonb;
3333
use rand::distributions::Alphanumeric;
3434
use rand::distributions::Standard;
3535
use rand::prelude::Distribution;
@@ -375,7 +375,9 @@ fn test_variant() {
375375
validity.push(true);
376376
let val = parse_value(value.as_bytes()).unwrap();
377377
let buf = val.to_vec();
378-
convert_to_comparable(&buf, &mut builder.data);
378+
let raw_jsonb = RawJsonb::new(&buf);
379+
let compare_buf = raw_jsonb.convert_to_comparable();
380+
builder.put_slice(&compare_buf);
379381
} else {
380382
validity.push(false);
381383
}

src/query/formats/src/field_encoder/json.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use databend_common_io::constants::NULL_BYTES_LOWER;
2121
use databend_common_io::constants::TRUE_BYTES_LOWER;
2222
use geozero::wkb::Ewkb;
2323
use geozero::ToJson;
24+
use jsonb::RawJsonb;
2425

2526
use crate::field_encoder::helpers::write_json_string;
2627
use crate::field_encoder::FieldEncoderValues;
@@ -81,7 +82,7 @@ impl FieldEncoderJSON {
8182

8283
Column::Variant(c) => {
8384
let v = unsafe { c.index_unchecked(row_index) };
84-
out_buf.extend_from_slice(jsonb::to_string(v).as_bytes());
85+
out_buf.extend_from_slice(RawJsonb::new(v).to_string().as_bytes());
8586
}
8687
Column::Geometry(c) => {
8788
let v = unsafe { c.index_unchecked(row_index) };

src/query/formats/src/field_encoder/values.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ use databend_common_io::geo_to_wkt;
4545
use databend_common_io::GeometryDataType;
4646
use geozero::wkb::Ewkb;
4747
use jiff::tz::TimeZone;
48+
use jsonb::RawJsonb;
4849
use lexical_core::ToLexical;
4950
use micromarshal::Marshal;
5051
use micromarshal::Unmarshal;
@@ -322,7 +323,7 @@ impl FieldEncoderValues {
322323
in_nested: bool,
323324
) {
324325
let v = unsafe { column.index_unchecked(row_index) };
325-
let s = jsonb::to_string(v);
326+
let s = RawJsonb::new(v).to_string();
326327
self.write_string_inner(s.as_bytes(), out_buf, in_nested);
327328
}
328329

0 commit comments

Comments
 (0)