From 5dedf29c7938ea8f70f3b7a93f7fa0d09d231c48 Mon Sep 17 00:00:00 2001 From: Jack Kleeman Date: Tue, 19 Aug 2025 11:50:06 +0100 Subject: [PATCH 1/4] Prepare for datafusion v50 These derives will be needed for udfs going forward. --- src/common_macros.rs | 2 +- src/common_union.rs | 2 +- src/json_as_text.rs | 2 +- src/json_contains.rs | 2 +- src/json_get.rs | 2 +- src/json_get_array.rs | 2 +- src/json_get_bool.rs | 2 +- src/json_get_float.rs | 2 +- src/json_get_int.rs | 2 +- src/json_get_json.rs | 2 +- src/json_get_str.rs | 2 +- src/json_length.rs | 2 +- src/json_object_keys.rs | 2 +- tests/main.rs | 1 + 14 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/common_macros.rs b/src/common_macros.rs index a2c6cd0..bc4c1b3 100644 --- a/src/common_macros.rs +++ b/src/common_macros.rs @@ -30,7 +30,7 @@ macro_rules! make_udf_function { static [< STATIC_ $expr_fn_name:upper >]: std::sync::OnceLock> = std::sync::OnceLock::new(); - /// ScalarFunction that returns a [`ScalarUDF`] for [`$udf_impl`] + /// `ScalarFunction` that returns a [`ScalarUDF`] for [`$udf_impl`] /// /// [`ScalarUDF`]: datafusion::logical_expr::ScalarUDF pub fn [< $expr_fn_name _udf >]() -> std::sync::Arc { diff --git a/src/common_union.rs b/src/common_union.rs index b462136..217042b 100644 --- a/src/common_union.rs +++ b/src/common_union.rs @@ -257,7 +257,7 @@ impl JsonUnionEncoder { /// /// Panics if the idx is outside the union values or an invalid type id exists in the union. #[must_use] - pub fn get_value(&self, idx: usize) -> JsonUnionValue { + pub fn get_value(&self, idx: usize) -> JsonUnionValue<'_> { let type_id = self.type_ids[idx]; match type_id { TYPE_ID_NULL => JsonUnionValue::JsonNull, diff --git a/src/json_as_text.rs b/src/json_as_text.rs index 4c3fdba..bfb4cb1 100644 --- a/src/json_as_text.rs +++ b/src/json_as_text.rs @@ -17,7 +17,7 @@ make_udf_function!( r#"Get any value from a JSON string by its "path", represented as a string"# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonAsText { signature: Signature, aliases: [String; 1], diff --git a/src/json_contains.rs b/src/json_contains.rs index a1ab056..01b1120 100644 --- a/src/json_contains.rs +++ b/src/json_contains.rs @@ -17,7 +17,7 @@ make_udf_function!( r#"Does the key/index exist within the JSON value as the specified "path"?"# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonContains { signature: Signature, aliases: [String; 1], diff --git a/src/json_get.rs b/src/json_get.rs index 097bae2..4c7f9bc 100644 --- a/src/json_get.rs +++ b/src/json_get.rs @@ -23,7 +23,7 @@ make_udf_function!( // build_typed_get!(JsonGet, "json_get", Union, Float64Array, jiter_json_get_float); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGet { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_array.rs b/src/json_get_array.rs index 0508a51..88c680d 100644 --- a/src/json_get_array.rs +++ b/src/json_get_array.rs @@ -17,7 +17,7 @@ make_udf_function!( r#"Get an arrow array from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetArray { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_bool.rs b/src/json_get_bool.rs index 4cb4560..17be9b0 100644 --- a/src/json_get_bool.rs +++ b/src/json_get_bool.rs @@ -16,7 +16,7 @@ make_udf_function!( r#"Get an boolean value from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetBool { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_float.rs b/src/json_get_float.rs index 34e0247..aff252b 100644 --- a/src/json_get_float.rs +++ b/src/json_get_float.rs @@ -17,7 +17,7 @@ make_udf_function!( r#"Get a float value from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetFloat { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_int.rs b/src/json_get_int.rs index 26d24ec..5788957 100644 --- a/src/json_get_int.rs +++ b/src/json_get_int.rs @@ -17,7 +17,7 @@ make_udf_function!( r#"Get an integer value from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetInt { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_json.rs b/src/json_get_json.rs index a8c6477..5907b2b 100644 --- a/src/json_get_json.rs +++ b/src/json_get_json.rs @@ -15,7 +15,7 @@ make_udf_function!( r#"Get a nested raw JSON string from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetJson { signature: Signature, aliases: [String; 1], diff --git a/src/json_get_str.rs b/src/json_get_str.rs index e8ee200..658f1e3 100644 --- a/src/json_get_str.rs +++ b/src/json_get_str.rs @@ -16,7 +16,7 @@ make_udf_function!( r#"Get a string value from a JSON string by its "path""# ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonGetStr { signature: Signature, aliases: [String; 1], diff --git a/src/json_length.rs b/src/json_length.rs index f478854..8bd657d 100644 --- a/src/json_length.rs +++ b/src/json_length.rs @@ -17,7 +17,7 @@ make_udf_function!( r"Get the length of the array or object at the given path." ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonLength { signature: Signature, aliases: [String; 2], diff --git a/src/json_object_keys.rs b/src/json_object_keys.rs index a07cec0..8ea040d 100644 --- a/src/json_object_keys.rs +++ b/src/json_object_keys.rs @@ -17,7 +17,7 @@ make_udf_function!( r"Get the keys of a JSON object as an array." ); -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash)] pub(super) struct JsonObjectKeys { signature: Signature, aliases: [String; 2], diff --git a/tests/main.rs b/tests/main.rs index 35a24a4..1d47b3a 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -1573,6 +1573,7 @@ fn check_for_null_dictionary_values(array: &dyn Array) { } /// Test that we don't output nulls in dictionary values. +#[allow(clippy::doc_markdown)] /// This can cause issues with arrow-rs and DataFusion; they expect nulls to be in keys. #[tokio::test] async fn test_dict_get_no_null_values() { From 5c4d046966d428392deb0559b013a05d4a531faa Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:07:43 -0400 Subject: [PATCH 2/4] update to df 50 --- Cargo.toml | 4 ++-- tests/main.rs | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2180949..1e81d1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,14 +11,14 @@ repository = "https://github.com/datafusion-contrib/datafusion-functions-json/" rust-version = "1.85.1" [dependencies] -datafusion = { version = "49", default-features = false } +datafusion = { version = "50", default-features = false } jiter = "0.10" log = "0.4" paste = "1" [dev-dependencies] codspeed-criterion-compat = "2.6" -datafusion = { version = "49", default-features = false, features = [ +datafusion = { version = "50", default-features = false, features = [ "nested_expressions", ] } tokio = { version = "1.43", features = ["full"] } diff --git a/tests/main.rs b/tests/main.rs index 1d47b3a..ba74390 100644 --- a/tests/main.rs +++ b/tests/main.rs @@ -6,6 +6,7 @@ use datafusion::arrow::datatypes::{Field, Int64Type, Int8Type, Schema}; use datafusion::arrow::{array::StringDictionaryBuilder, datatypes::DataType}; use datafusion::assert_batches_eq; use datafusion::common::ScalarValue; +use datafusion::config::ConfigOptions; use datafusion::logical_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion::prelude::SessionContext; use datafusion_functions_json::udfs::json_get_str_udf; @@ -600,6 +601,7 @@ fn test_json_get_utf8() { Field::new("ret_field", DataType::Utf8, false) .with_metadata(HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())])), ), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap() else { @@ -633,6 +635,7 @@ fn test_json_get_large_utf8() { Field::new("ret_field", DataType::Utf8, false) .with_metadata(HashMap::from_iter(vec![("is_json".to_string(), "true".to_string())])), ), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap() else { From f5c4144601c2dbaa272401439b9fd0b884a0f51b Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:16:40 -0400 Subject: [PATCH 3/4] fix more --- Cargo.toml | 2 +- benches/main.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1e81d1d..c434255 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ license = "Apache-2.0" keywords = ["datafusion", "JSON", "SQL"] categories = ["database-implementations", "parsing"] repository = "https://github.com/datafusion-contrib/datafusion-functions-json/" -rust-version = "1.85.1" +rust-version = "1.86.0" [dependencies] datafusion = { version = "50", default-features = false } diff --git a/benches/main.rs b/benches/main.rs index 77d8eae..521e0c6 100644 --- a/benches/main.rs +++ b/benches/main.rs @@ -33,6 +33,7 @@ fn bench_json_contains(b: &mut Bencher) { number_rows: 1, arg_fields: arg_fields.clone(), return_field: return_field.clone(), + config_options: Arc::new(datafusion::config::ConfigOptions::default()), }) .unwrap() }); @@ -63,6 +64,7 @@ fn bench_json_get_str_scalar(b: &mut Bencher) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: return_field.clone(), + config_options: Arc::new(datafusion::config::ConfigOptions::default()), }) .unwrap(); }); @@ -94,6 +96,7 @@ fn bench_json_get_str_array(b: &mut Bencher) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: return_field.clone(), + config_options: Arc::new(datafusion::config::ConfigOptions::default()), }) .unwrap(); }); @@ -125,6 +128,7 @@ fn bench_json_get_str_view_array(b: &mut Bencher) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: return_field.clone(), + config_options: Arc::new(datafusion::config::ConfigOptions::default()), }) .unwrap(); }); From 0bde5c0f7d55d817649e4582da3df295786cc6e1 Mon Sep 17 00:00:00 2001 From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:17:36 -0400 Subject: [PATCH 4/4] bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index c434255..2d61d09 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "datafusion-functions-json" -version = "0.49.1" +version = "0.50.0" edition = "2021" description = "JSON functions for DataFusion" readme = "README.md"