Skip to content

Commit bd9e667

Browse files
authored
Merge pull request #100 from influxdata/crepererum/bundle_guests
feat: bundle guests
2 parents 333d535 + e04f91b commit bd9e667

File tree

10 files changed

+296
-22
lines changed

10 files changed

+296
-22
lines changed

Cargo.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ license = "MIT OR Apache-2.0"
77
[workspace]
88
members = [
99
"arrow2bytes",
10+
"guests/bundle",
1011
"guests/python",
1112
"guests/rust",
1213
"host",
@@ -19,7 +20,9 @@ chrono = { version = "0.4.42", default-features = false }
1920
datafusion-common = { version = "49.0.1", default-features = false }
2021
datafusion-expr = { version = "49.0.1", default-features = false }
2122
datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" }
23+
datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" }
2224
datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" }
25+
datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" }
2326
tokio = { version = "1.48.0", default-features = false }
2427
pyo3 = { version = "0.26.0", default-features = false }
2528
tar = { version = "0.4.44", default-features = false }

Justfile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,37 +8,37 @@ default:
88
check-rust-build: guests::rust::check-build guests::python::check-build
99

1010
# check Rust files via `cargo check`
11-
check-rust-check:
11+
check-rust-check $JUSTCHECK="1":
1212
@echo ::group::check-rust-check
1313
cargo check --workspace --all-features
1414
@echo ::endgroup::
1515

1616
# check Rust files via `cargo clippy`
17-
check-rust-clippy:
17+
check-rust-clippy $JUSTCHECK="1":
1818
@echo ::group::check-rust-clippy
1919
cargo clippy --all-features --all-targets --workspace -- -D warnings
2020
@echo ::endgroup::
2121

2222
# check Rust formatting
23-
check-rust-fmt:
23+
check-rust-fmt $JUSTCHECK="1":
2424
@echo ::group::check-rust-fmt
2525
cargo fmt --all -- --check
2626
@echo ::endgroup::
2727

2828
# test Rust code
29-
check-rust-test $RUST_BACKTRACE="1": check-rust-build
29+
check-rust-test $RUST_BACKTRACE="1":
3030
@echo ::group::check-rust-test
3131
cargo test --all-features --workspace
3232
@echo ::endgroup::
3333

3434
# build Rust docs
35-
check-rust-doc:
35+
check-rust-doc $JUSTCHECK="1":
3636
@echo ::group::check-rust-doc
3737
cargo doc --document-private-items --all-features --workspace
3838
@echo ::endgroup::
3939

4040
# dry-run Rust benchmarks
41-
check-rust-bench: check-rust-build
41+
check-rust-bench:
4242
@echo ::group::check-rust-bench
4343
cargo bench --profile=dev --all-features --workspace -- --test
4444
@echo ::endgroup::

guests/bundle/Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "datafusion-udf-wasm-bundle"
3+
version.workspace = true
4+
authors.workspace = true
5+
edition.workspace = true
6+
license.workspace = true
7+
8+
[lints]
9+
workspace = true
10+
11+
[features]
12+
example = ["dep:datafusion-udf-wasm-guest"]
13+
python = ["dep:datafusion-udf-wasm-python"]
14+
15+
[build-dependencies]
16+
# these need to be marked as build dependencies so the build script reruns whenever they change
17+
datafusion-udf-wasm-guest = { workspace = true, optional = true }
18+
datafusion-udf-wasm-python = { workspace = true, optional = true }
19+
20+
# the actual build-time dependencies
21+
serde_json = "1.0.145"

guests/bundle/build.rs

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
//! Build script.
2+
#![allow(unused_crate_dependencies)]
3+
4+
use std::{
5+
collections::HashMap,
6+
path::{Path, PathBuf},
7+
process::{Command, Stdio},
8+
str::FromStr,
9+
};
10+
11+
fn main() {
12+
let profile: Profile = std::env::var("PROFILE").unwrap().parse().unwrap();
13+
let package_locations = package_locations();
14+
15+
// does it look like we are running under clippy or rust-analyzer
16+
// This code was inspired by
17+
// https://github.com/bytecodealliance/componentize-py/blob/139d0ed85f09095e0a4cfa112e97ce589371315e/build.rs#L35-L42
18+
//
19+
// This doesn't detect the following things though:
20+
// - `cargo check`: https://github.com/rust-lang/cargo/issues/4001
21+
// - `cargo doc`: https://github.com/rust-lang/cargo/issues/8811
22+
println!("cargo::rerun-if-env-changed=JUSTCHECK");
23+
let stub = matches!(
24+
std::env::var("CARGO_CFG_FEATURE").as_deref(),
25+
Ok("cargo-clippy")
26+
) || std::env::var("CLIPPY_ARGS").is_ok()
27+
|| std::env::var("CARGO_EXPAND_NO_RUN_NIGHTLY").is_ok()
28+
|| std::env::var("DOCS_RS").is_ok()
29+
|| std::env::var("JUSTCHECK").is_ok();
30+
31+
for feature in FEATURES {
32+
println!("processing {}", feature.name);
33+
feature.build_or_stub(stub, profile, &package_locations);
34+
}
35+
}
36+
37+
/// Get locations for all packages in the dependency tree.
38+
fn package_locations() -> HashMap<String, PathBuf> {
39+
let json = Command::new(std::env::var("CARGO").unwrap())
40+
.current_dir(std::env::var_os("CARGO_MANIFEST_DIR").unwrap())
41+
.arg("metadata")
42+
.run();
43+
44+
let json: serde_json::Value = serde_json::from_str(&json).expect("valid json");
45+
46+
json.as_object()
47+
.unwrap()
48+
.get("packages")
49+
.unwrap()
50+
.as_array()
51+
.unwrap()
52+
.iter()
53+
.map(|val| {
54+
let package = val.as_object().unwrap();
55+
let name = package.get("name").unwrap().as_str().unwrap().to_owned();
56+
let manifest_path =
57+
PathBuf::from(package.get("manifest_path").unwrap().as_str().unwrap())
58+
.parent()
59+
.unwrap()
60+
.to_owned();
61+
(name, manifest_path)
62+
})
63+
.collect::<HashMap<_, _>>()
64+
}
65+
66+
/// Extension trait for [`Command`].
67+
trait CommandExt {
68+
/// Sanitize environment variables.
69+
fn sanitize_env(&mut self) -> &mut Self;
70+
71+
/// Run command, check status, and convert output to a string.
72+
fn run(&mut self) -> String;
73+
}
74+
75+
impl CommandExt for Command {
76+
fn sanitize_env(&mut self) -> &mut Self {
77+
let mut cmd = self.env_clear();
78+
79+
// Code inspired by
80+
// https://github.com/bytecodealliance/componentize-py/blob/139d0ed85f09095e0a4cfa112e97ce589371315e/build.rs#L117-L125
81+
for (k, v) in std::env::vars_os() {
82+
let Ok(k) = k.into_string() else {
83+
continue;
84+
};
85+
if k.starts_with("CARGO") || k.starts_with("RUST") {
86+
continue;
87+
}
88+
cmd = cmd.env(k, v);
89+
}
90+
91+
cmd
92+
}
93+
94+
fn run(&mut self) -> String {
95+
let output = self
96+
.stdout(Stdio::piped())
97+
.spawn()
98+
.unwrap()
99+
.wait_with_output()
100+
.unwrap();
101+
102+
assert!(output.status.success());
103+
String::from_utf8(output.stdout).expect("valid UTF-8")
104+
}
105+
}
106+
107+
/// Known cargo profile.
108+
#[derive(Debug, Clone, Copy)]
109+
enum Profile {
110+
/// Debug/dev.
111+
Debug,
112+
113+
/// Release.
114+
Release,
115+
}
116+
117+
impl Profile {
118+
/// Get static string for profile.
119+
fn as_str(&self) -> &'static str {
120+
match self {
121+
Self::Debug => "debug",
122+
Self::Release => "release",
123+
}
124+
}
125+
}
126+
127+
impl FromStr for Profile {
128+
type Err = String;
129+
130+
fn from_str(s: &str) -> Result<Self, Self::Err> {
131+
match s {
132+
"debug" => Ok(Self::Debug),
133+
"release" => Ok(Self::Release),
134+
other => Err(other.to_owned()),
135+
}
136+
}
137+
}
138+
139+
impl std::fmt::Display for Profile {
140+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
141+
self.as_str().fmt(f)
142+
}
143+
}
144+
145+
/// Feature description.
146+
struct Feature {
147+
/// Lowercase feature name.
148+
name: &'static str,
149+
150+
/// Package that contains the feature code.
151+
package: &'static str,
152+
153+
/// `just` command prefix that compiles the feature.
154+
///
155+
/// This will call `just prefix{profile}` within the package directory.
156+
just_cmd_prefix: &'static str,
157+
158+
/// Path components to file in target directory.
159+
///
160+
/// So `["foo", "bar.bin"]` will resolve to `CARGO_TARGET_DIR/wasm32-wasip2/foo/bar.bin`.
161+
just_out_file: &'static [&'static str],
162+
}
163+
164+
impl Feature {
165+
/// Build or stub feature.
166+
fn build_or_stub(
167+
&self,
168+
stub: bool,
169+
profile: Profile,
170+
package_locations: &HashMap<String, PathBuf>,
171+
) {
172+
let Self {
173+
name,
174+
package,
175+
just_cmd_prefix,
176+
just_out_file,
177+
} = self;
178+
179+
let name_upper = name.to_uppercase();
180+
if std::env::var_os(format!("CARGO_FEATURE_{name_upper}")).is_none() {
181+
// feature not selected
182+
return;
183+
}
184+
185+
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
186+
187+
let out_file = if stub {
188+
let out_file = out_dir.join(format!("{name}.wasm"));
189+
// write empty stub file
190+
std::fs::write(&out_file, b"").unwrap();
191+
out_file
192+
} else {
193+
let target_dir = out_dir.join(name);
194+
195+
just_build(
196+
package_locations.get(*package).unwrap(),
197+
&format!("{just_cmd_prefix}{profile}"),
198+
&target_dir,
199+
);
200+
201+
just_out_file.iter().fold(
202+
target_dir.join("wasm32-wasip2").join(profile.as_str()),
203+
|path, part| path.join(part),
204+
)
205+
};
206+
207+
println!(
208+
"cargo::rustc-env=BIN_PATH_{name_upper}={}",
209+
out_file.display(),
210+
);
211+
}
212+
}
213+
214+
/// Build a target with `just`.
215+
fn just_build(cwd: &Path, just_cmd: &str, cargo_target_dir: &Path) {
216+
Command::new("just")
217+
.current_dir(cwd)
218+
.arg(just_cmd)
219+
.sanitize_env()
220+
.env("CARGO_TARGET_DIR", cargo_target_dir.as_os_str())
221+
.run();
222+
}
223+
224+
/// All supported features.
225+
///
226+
/// This must be in-sync with the feature list in `Cargo.toml` and the imports in `src/lib.rs`.
227+
const FEATURES: &[Feature] = &[
228+
Feature {
229+
name: "example",
230+
package: "datafusion-udf-wasm-guest",
231+
just_cmd_prefix: "build-add-one-",
232+
just_out_file: &["examples", "add_one.wasm"],
233+
},
234+
Feature {
235+
name: "python",
236+
package: "datafusion-udf-wasm-python",
237+
just_cmd_prefix: "",
238+
just_out_file: &["datafusion_udf_wasm_python.wasm"],
239+
},
240+
];

guests/bundle/src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
//! Bundles guests as pre-compiled WASM bytecode.
2+
3+
/// "add-one" example.
4+
#[cfg(feature = "example")]
5+
pub static BIN_EXAMPLE: &[u8] = include_bytes!(env!("BIN_PATH_EXAMPLE"));
6+
7+
/// Python UDF.
8+
#[cfg(feature = "python")]
9+
pub static BIN_PYTHON: &[u8] = include_bytes!(env!("BIN_PATH_PYTHON"));

host/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ wasmtime-wasi.workspace = true
2121
wasmtime-wasi-http.workspace = true
2222

2323
[dev-dependencies]
24+
datafusion-udf-wasm-bundle = { workspace = true, features = ["example", "python"] }
2425
insta = "1.43.2"
2526
tokio = { workspace = true, features = ["fs", "macros"] }

host/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ use crate::{error::WasmToDataFusionResultExt, tokio_helpers::blocking_io};
2626

2727
// unused-crate-dependencies false positives
2828
#[cfg(test)]
29+
use datafusion_udf_wasm_bundle as _;
30+
#[cfg(test)]
2931
use insta as _;
3032

3133
mod bindings;

host/tests/integration_tests/python/test_utils.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,7 @@ static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();
77
async fn python_component() -> &'static WasmComponentPrecompiled {
88
COMPONENT
99
.get_or_init(async || {
10-
let wasm_binary = tokio::fs::read(format!(
11-
"{}/../target/wasm32-wasip2/debug/datafusion_udf_wasm_python.wasm",
12-
env!("CARGO_MANIFEST_DIR")
13-
))
14-
.await
15-
.unwrap();
16-
17-
WasmComponentPrecompiled::new(wasm_binary.into())
10+
WasmComponentPrecompiled::new(datafusion_udf_wasm_bundle::BIN_PYTHON.into())
1811
.await
1912
.unwrap()
2013
})

0 commit comments

Comments
 (0)