Skip to content

Commit 65240a4

Browse files
authored
refactor(query): Improve parse json performance (#18451)
perf(query): Improve parse json performance
1 parent e156b20 commit 65240a4

File tree

12 files changed

+247
-302
lines changed

12 files changed

+247
-302
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ jaq-interpret = "1.5.0"
366366
jaq-parse = "1.0.3"
367367
jaq-std = "1.6.0"
368368
jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] }
369-
jsonb = "0.5.2"
369+
jsonb = "0.5.3"
370370
jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] }
371371
lenient_semver = "0.4.2"
372372
levenshtein_automata = "0.2.1"

src/query/expression/src/types/variant.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -378,17 +378,15 @@ pub fn cast_scalar_to_variant(
378378
}
379379
ScalarRef::Geometry(bytes) => {
380380
let geom = Ewkb(bytes).to_json().expect("failed to decode wkb data");
381-
jsonb::parse_value(geom.as_bytes())
382-
.expect("failed to parse geojson to json value")
383-
.write_to_vec(buf);
381+
jsonb::parse_owned_jsonb_with_buf(geom.as_bytes(), buf)
382+
.expect("failed to parse geojson to json value");
384383
return;
385384
}
386385
ScalarRef::Geography(bytes) => {
387386
// todo: Implement direct conversion, omitting intermediate processes
388387
let geom = Ewkb(bytes.0).to_json().expect("failed to decode wkb data");
389-
jsonb::parse_value(geom.as_bytes())
390-
.expect("failed to parse geojson to json value")
391-
.write_to_vec(buf);
388+
jsonb::parse_owned_jsonb_with_buf(geom.as_bytes(), buf)
389+
.expect("failed to parse geojson to json value");
392390
return;
393391
}
394392
ScalarRef::Vector(scalar) => with_vector_number_type!(|NUM_TYPE| match scalar {

src/query/expression/src/utils/variant_transform.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
use databend_common_exception::ErrorCode;
1616
use databend_common_exception::Result;
17-
use jsonb::parse_value;
17+
use jsonb::parse_owned_jsonb;
1818
use jsonb::RawJsonb;
1919

2020
use crate::types::AnyType;
@@ -101,7 +101,7 @@ fn transform_scalar(scalar: ScalarRef<'_>, decode: bool) -> Result<Scalar> {
101101
let raw_jsonb = RawJsonb::new(data);
102102
Scalar::Variant(raw_jsonb.to_string().into_bytes())
103103
} else {
104-
let value = parse_value(data).map_err(|err| {
104+
let value = parse_owned_jsonb(data).map_err(|err| {
105105
ErrorCode::UDFDataError(format!("parse json value error: {err}"))
106106
})?;
107107
Scalar::Variant(value.to_vec())

src/query/expression/tests/it/row.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ use databend_common_expression::FromData;
2828
use databend_common_expression::RowConverter;
2929
use databend_common_expression::SortField;
3030
use itertools::Itertools;
31-
use jsonb::parse_value;
32-
use jsonb::RawJsonb;
31+
use jsonb::parse_owned_jsonb;
3332
use rand::distributions::Alphanumeric;
3433
use rand::distributions::Standard;
3534
use rand::prelude::Distribution;
@@ -386,9 +385,8 @@ fn test_variant() {
386385
for value in values {
387386
if let Some(value) = value {
388387
validity.push(true);
389-
let val = parse_value(value.as_bytes()).unwrap();
390-
let buf = val.to_vec();
391-
let raw_jsonb = RawJsonb::new(&buf);
388+
let owned_jsonb = parse_owned_jsonb(value.as_bytes()).unwrap();
389+
let raw_jsonb = owned_jsonb.as_raw();
392390
let compare_buf = raw_jsonb.convert_to_comparable();
393391
builder.put_slice(&compare_buf);
394392
} else {

src/query/formats/src/field_decoder/fast_values.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use databend_common_io::parse_bitmap;
6060
use databend_common_io::parse_bytes_to_ewkb;
6161
use databend_common_io::prelude::FormatSettings;
6262
use databend_common_io::Interval;
63-
use jsonb::parse_value;
63+
use jsonb::parse_owned_jsonb_with_buf;
6464
use lexical_core::FromLexical;
6565
use num_traits::NumCast;
6666

@@ -468,9 +468,8 @@ impl FastFieldDecoderValues {
468468
) -> Result<()> {
469469
let mut buf = Vec::new();
470470
self.read_string_inner(reader, &mut buf, positions)?;
471-
match parse_value(&buf) {
472-
Ok(value) => {
473-
value.write_to_vec(&mut column.data);
471+
match parse_owned_jsonb_with_buf(&buf, &mut column.data) {
472+
Ok(_) => {
474473
column.commit_row();
475474
}
476475
Err(_) => {

src/query/formats/src/field_decoder/nested.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use databend_common_io::geography::geography_from_ewkt_bytes;
5353
use databend_common_io::parse_bitmap;
5454
use databend_common_io::parse_bytes_to_ewkb;
5555
use databend_common_io::Interval;
56-
use jsonb::parse_value;
56+
use jsonb::parse_owned_jsonb_with_buf;
5757
use lexical_core::FromLexical;
5858

5959
use crate::binary::decode_binary;
@@ -306,9 +306,8 @@ impl NestedValues {
306306
) -> Result<()> {
307307
let mut buf = Vec::new();
308308
self.read_string_inner(reader, &mut buf)?;
309-
match parse_value(&buf) {
310-
Ok(value) => {
311-
value.write_to_vec(&mut column.data);
309+
match parse_owned_jsonb_with_buf(&buf, &mut column.data) {
310+
Ok(_) => {
312311
column.commit_row();
313312
}
314313
Err(e) => {

src/query/formats/src/field_decoder/separated_text.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ use databend_common_io::parse_bytes_to_ewkb;
5151
use databend_common_io::Interval;
5252
use databend_common_meta_app::principal::CsvFileFormatParams;
5353
use databend_common_meta_app::principal::TsvFileFormatParams;
54-
use jsonb::parse_value;
54+
use jsonb::parse_owned_jsonb_with_buf;
5555
use lexical_core::FromLexical;
5656
use num_traits::NumCast;
5757

@@ -287,9 +287,8 @@ impl SeparatedTextDecoder {
287287
}
288288

289289
fn read_variant(&self, column: &mut BinaryColumnBuilder, data: &[u8]) -> Result<()> {
290-
match parse_value(data) {
291-
Ok(value) => {
292-
value.write_to_vec(&mut column.data);
290+
match parse_owned_jsonb_with_buf(data, &mut column.data) {
291+
Ok(_) => {
293292
column.commit_row();
294293
}
295294
Err(e) => {

src/query/functions/src/scalars/geographic/src/geometry.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ use geozero::CoordDimensions;
7575
use geozero::GeozeroGeometry;
7676
use geozero::ToGeo;
7777
use geozero::ToWkb;
78-
use jsonb::parse_value;
78+
use jsonb::parse_owned_jsonb_with_buf;
7979
use jsonb::RawJsonb;
8080
use num_traits::AsPrimitive;
8181
use proj4rs::transform::transform;
@@ -134,14 +134,11 @@ pub fn register(registry: &mut FunctionRegistry) {
134134
}
135135

136136
match ewkb_to_geo(&mut Ewkb(ewkb)).and_then(|(geo, _)| geo_to_json(geo)) {
137-
Ok(json) => match parse_value(json.as_bytes()) {
138-
Ok(json_val) => {
139-
json_val.write_to_vec(&mut builder.data);
140-
}
141-
Err(e) => {
137+
Ok(json) => {
138+
if let Err(e) = parse_owned_jsonb_with_buf(json.as_bytes(), &mut builder.data) {
142139
ctx.set_error(builder.len(), e.to_string());
143140
}
144-
},
141+
}
145142
Err(e) => {
146143
ctx.set_error(builder.len(), e.to_string());
147144
}

0 commit comments

Comments
 (0)