Skip to content

Commit c94e670

Browse files
committed
feat: create UdfCodeFormatter trait
This commit does a few things. First, it splits off into it's own module. It also shuffles around some test utils as part of that. Most importantly though, it defines and implements a UdfCodeFormatter trait for formatting various programming languages when parsing & registering UDFs.
1 parent 9e540ae commit c94e670

File tree

28 files changed

+294
-77
lines changed

28 files changed

+294
-77
lines changed

Cargo.lock

Lines changed: 14 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ members = [
66
"guests/python",
77
"guests/rust",
88
"host",
9+
"query",
910
]
1011

1112
[workspace.package]
@@ -25,6 +26,7 @@ datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" }
2526
datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" }
2627
datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" }
2728
datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" }
29+
datafusion-udf-wasm-query = { path = "query", version = "0.1.0" }
2830
http = { version = "1.3.1", default-features = false }
2931
hyper = { version = "1.7", default-features = false }
3032
pyo3 = { version = "0.27.1", default-features = false, features = ["macros"] }

host/Cargo.toml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@ license.workspace = true
77
[dependencies]
88
anyhow.workspace = true
99
arrow.workspace = true
10-
datafusion.workspace = true
1110
datafusion-common.workspace = true
1211
datafusion-expr.workspace = true
13-
datafusion-sql.workspace = true
1412
datafusion-udf-wasm-arrow2bytes.workspace = true
13+
datafusion-udf-wasm-bundle = { workspace = true, features = [
14+
"example",
15+
"python"
16+
] }
1517
http.workspace = true
1618
hyper.workspace = true
1719
rand = { version = "0.9" }
1820
siphasher = { version = "1", default-features = false }
19-
sqlparser.workspace = true
2021
tar.workspace = true
2122
tokio = { workspace = true, features = ["rt", "rt-multi-thread", "sync"] }
2223
wasmtime.workspace = true
@@ -25,10 +26,6 @@ wasmtime-wasi-http.workspace = true
2526
wasmtime-wasi-io.workspace = true
2627

2728
[dev-dependencies]
28-
datafusion-udf-wasm-bundle = { workspace = true, features = [
29-
"example",
30-
"python"
31-
] }
3229
insta = "1.43.2"
3330
tokio = { workspace = true, features = ["fs", "macros"] }
3431
wiremock = "0.6.5"

host/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ mod conversion;
4242
mod error;
4343
pub mod http;
4444
mod linker;
45+
/// Utilities for testing.
46+
pub mod test_utils;
4547
mod tokio_helpers;
46-
pub mod udf_query;
4748
mod vfs;
4849

4950
/// State of the WASM payload.

host/src/test_utils/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/// Utilities for testing.
2+
pub mod python;

host/tests/integration_tests/python/test_utils.rs renamed to host/src/test_utils/python.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
use crate::{WasmComponentPrecompiled, WasmScalarUdf};
12
use datafusion_common::DataFusionError;
2-
use datafusion_udf_wasm_host::{WasmComponentPrecompiled, WasmScalarUdf};
33
use tokio::sync::OnceCell;
44

5+
/// Static precompiled Python WASM component for tests
56
static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
67

7-
pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
8+
/// Returns a static reference to the precompiled Python WASM component.
9+
pub async fn python_component() -> &'static WasmComponentPrecompiled {
810
COMPONENT
911
.get_or_init(async || {
1012
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into())
@@ -14,13 +16,15 @@ pub(crate) async fn python_component() -> &'static WasmComponentPrecompiled {
1416
.await
1517
}
1618

17-
pub(crate) async fn python_scalar_udfs(code: &str) -> Result<Vec<WasmScalarUdf>, DataFusionError> {
19+
/// Compiles the provided Python UDF code into a list of WasmScalarUdf instances.
20+
pub async fn python_scalar_udfs(code: &str) -> Result<Vec<WasmScalarUdf>, DataFusionError> {
1821
let component = python_component().await;
1922

2023
WasmScalarUdf::new(component, &Default::default(), code.to_owned()).await
2124
}
2225

23-
pub(crate) async fn python_scalar_udf(code: &str) -> Result<WasmScalarUdf, DataFusionError> {
26+
/// Compiles the provided Python UDF code into a single WasmScalarUdf instance.
27+
pub async fn python_scalar_udf(code: &str) -> Result<WasmScalarUdf, DataFusionError> {
2428
let udfs = python_scalar_udfs(code).await?;
2529
assert_eq!(udfs.len(), 1);
2630
Ok(udfs.into_iter().next().expect("just checked len"))
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
mod python;
22
mod rust;
33
mod test_utils;
4-
mod udf_query;

host/tests/integration_tests/python/argument_forms.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ use arrow::{
88
datatypes::{DataType, Field},
99
};
1010
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
11+
use datafusion_udf_wasm_host::test_utils::python::python_scalar_udf;
1112

12-
use crate::integration_tests::{
13-
python::test_utils::python_scalar_udf, test_utils::ColumnarValueExt,
14-
};
13+
use crate::integration_tests::test_utils::ColumnarValueExt;
1514

1615
#[tokio::test(flavor = "multi_thread")]
1716
async fn test_positional_or_keyword() {

host/tests/integration_tests/python/examples.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@ use arrow::{
66
};
77
use datafusion_common::ScalarValue;
88
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
9+
use datafusion_udf_wasm_host::test_utils::python::python_scalar_udf;
910

10-
use crate::integration_tests::{
11-
python::test_utils::python_scalar_udf, test_utils::ColumnarValueExt,
12-
};
11+
use crate::integration_tests::test_utils::ColumnarValueExt;
1312

1413
#[tokio::test(flavor = "multi_thread")]
1514
async fn test_add_one() {

host/tests/integration_tests/python/inspection/errors.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use datafusion_common::DataFusionError;
2-
3-
use crate::integration_tests::python::test_utils::python_scalar_udfs;
2+
use datafusion_udf_wasm_host::test_utils::python::python_scalar_udfs;
43

54
#[tokio::test(flavor = "multi_thread")]
65
async fn test_invalid_syntax() {

0 commit comments

Comments
 (0)