Skip to content

Commit 7ae11e9

Browse files
committed
feat: virtual file system
1 parent c63aca7 commit 7ae11e9

File tree

6 files changed

+805
-73
lines changed

6 files changed

+805
-73
lines changed

Cargo.lock

Lines changed: 35 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

host/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ datafusion-expr.workspace = true
1616
datafusion-udf-wasm-arrow2bytes.workspace = true
1717
http.workspace = true
1818
hyper.workspace = true
19+
rand = { version = "0.9" }
20+
siphasher = { version = "1", default-features = false }
1921
tar.workspace = true
20-
tempfile.workspace = true
2122
tokio = { workspace = true, features = ["rt", "rt-multi-thread", "sync"] }
2223
wasmtime.workspace = true
2324
wasmtime-wasi.workspace = true

host/src/lib.rs

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,17 @@
22
//!
33
//!
44
//! [DataFusion]: https://datafusion.apache.org/
5-
use std::{any::Any, io::Cursor, ops::DerefMut, sync::Arc};
5+
use std::{any::Any, ops::DerefMut, sync::Arc};
66

77
use arrow::datatypes::DataType;
88
use datafusion_common::{DataFusionError, Result as DataFusionResult};
99
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature};
10-
use tempfile::TempDir;
1110
use tokio::sync::Mutex;
1211
use wasmtime::{
1312
Engine, Store,
1413
component::{Component, ResourceAny},
1514
};
16-
use wasmtime_wasi::{
17-
DirPerms, FilePerms, ResourceTable, WasiCtx, WasiCtxView, WasiView, p2::pipe::MemoryOutputPipe,
18-
};
15+
use wasmtime_wasi::{ResourceTable, WasiCtx, WasiCtxView, WasiView, p2::pipe::MemoryOutputPipe};
1916
use wasmtime_wasi_http::{
2017
HttpResult, WasiHttpCtx, WasiHttpView,
2118
bindings::http::types::ErrorCode as HttpErrorCode,
@@ -25,12 +22,12 @@ use wasmtime_wasi_http::{
2522

2623
use crate::{
2724
bindings::exports::datafusion_udf_wasm::udf::types as wit_types,
28-
error::DataFusionResultExt,
25+
error::{DataFusionResultExt, WasmToDataFusionResultExt},
2926
http::{HttpRequestValidator, RejectAllHttpRequests},
3027
linker::link,
3128
tokio_helpers::async_in_sync_context,
29+
vfs::{VfsCtxView, VfsState, VfsView},
3230
};
33-
use crate::{error::WasmToDataFusionResultExt, tokio_helpers::blocking_io};
3431

3532
// unused-crate-dependencies false positives
3633
#[cfg(test)]
@@ -46,13 +43,14 @@ mod error;
4643
pub mod http;
4744
mod linker;
4845
mod tokio_helpers;
46+
mod vfs;
4947

5048
/// State of the WASM payload.
5149
struct WasmStateImpl {
52-
/// Temporary directory that holds the root filesystem.
50+
/// Virtual filesystem for the WASM payload.
5351
///
54-
/// This filesystem is provided to the payload as read-only.
55-
root: TempDir,
52+
/// This filesystem is provided to the payload in memory with read-write support.
53+
vfs_state: VfsState,
5654

5755
/// A limited buffer for stderr.
5856
///
@@ -75,15 +73,15 @@ struct WasmStateImpl {
7573
impl std::fmt::Debug for WasmStateImpl {
7674
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
7775
let Self {
78-
root,
76+
vfs_state,
7977
stderr,
8078
wasi_ctx: _,
8179
wasi_http_ctx: _,
8280
resource_table,
8381
http_validator,
8482
} = self;
8583
f.debug_struct("WasmStateImpl")
86-
.field("root", root)
84+
.field("vfs_state", vfs_state)
8785
.field("stderr", stderr)
8886
.field("wasi_ctx", &"<WASI_CTX>")
8987
.field("resource_table", resource_table)
@@ -136,6 +134,15 @@ impl WasiHttpView for WasmStateImpl {
136134
}
137135
}
138136

137+
impl VfsView for WasmStateImpl {
138+
fn vfs(&mut self) -> VfsCtxView<'_> {
139+
VfsCtxView {
140+
table: &mut self.resource_table,
141+
vfs_state: &mut self.vfs_state,
142+
}
143+
}
144+
}
145+
139146
/// Pre-compiled WASM component.
140147
///
141148
/// The pre-compilation is stateless and can be used to [create](WasmScalarUdf::new) multiple instances that do not share
@@ -266,21 +273,13 @@ impl WasmScalarUdf {
266273
) -> DataFusionResult<Vec<Self>> {
267274
let WasmComponentPrecompiled { engine, component } = component;
268275

269-
// TODO: we need an in-mem file system for this, see
270-
// - https://github.com/bytecodealliance/wasmtime/issues/8963
271-
// - https://github.com/Timmmm/wasmtime_fs_demo
272-
let root = blocking_io(TempDir::new)
273-
.await
274-
.map_err(DataFusionError::IoError)?;
276+
// Create in-memory VFS
277+
let vfs_state = VfsState::new();
275278

276279
let stderr = MemoryOutputPipe::new(1024);
277-
let wasi_ctx = WasiCtx::builder()
278-
.stderr(stderr.clone())
279-
.preopened_dir(root.path(), "/", DirPerms::READ, FilePerms::READ)
280-
.context("pre-open root dir", None)?
281-
.build();
280+
let wasi_ctx = WasiCtx::builder().stderr(stderr.clone()).build();
282281
let state = WasmStateImpl {
283-
root,
282+
vfs_state,
284283
stderr,
285284
wasi_ctx,
286285
wasi_http_ctx: WasiHttpCtx::new(),
@@ -291,7 +290,7 @@ impl WasmScalarUdf {
291290
.await
292291
.context("link WASM components", None)?;
293292

294-
// fill root FS
293+
// Populate VFS from tar archive
295294
let root_data = bindings
296295
.datafusion_udf_wasm_udf_types()
297296
.call_root_fs_tar(&mut store)
@@ -301,13 +300,11 @@ impl WasmScalarUdf {
301300
Some(&store.data().stderr.contents()),
302301
)?;
303302
if let Some(root_data) = root_data {
304-
let root_path = store.data().root.path().to_owned();
305-
blocking_io(move || {
306-
let mut a = tar::Archive::new(Cursor::new(root_data));
307-
a.unpack(root_path)
308-
})
309-
.await
310-
.map_err(DataFusionError::IoError)?;
303+
store
304+
.data_mut()
305+
.vfs_state
306+
.populate_from_tar(&root_data)
307+
.map_err(DataFusionError::IoError)?;
311308
}
312309

313310
let udf_resources = bindings

host/src/linker.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ use wasmtime::{
99
};
1010
use wasmtime_wasi::{ResourceTable, WasiView};
1111

12-
use crate::{WasmStateImpl, bindings::Datafusion};
12+
use crate::{
13+
WasmStateImpl,
14+
bindings::Datafusion,
15+
vfs::{HasFs, VfsView},
16+
};
1317

1418
/// Link everything.
1519
pub(crate) async fn link(
@@ -37,7 +41,6 @@ fn link_wasi_p2(linker: &mut Linker<WasmStateImpl>) -> Result<()> {
3741
use wasmtime_wasi::{
3842
cli::{WasiCli, WasiCliView},
3943
clocks::{WasiClocks, WasiClocksView},
40-
filesystem::{WasiFilesystem, WasiFilesystemView},
4144
p2::bindings,
4245
random::WasiRandom,
4346
sockets::{WasiSockets, WasiSocketsView},
@@ -96,13 +99,10 @@ fn link_wasi_p2(linker: &mut Linker<WasmStateImpl>) -> Result<()> {
9699
linker,
97100
WasmStateImpl::cli,
98101
)?;
99-
bindings::filesystem::types::add_to_linker::<WasmStateImpl, WasiFilesystem>(
100-
linker,
101-
WasmStateImpl::filesystem,
102-
)?;
103-
bindings::filesystem::preopens::add_to_linker::<WasmStateImpl, WasiFilesystem>(
102+
bindings::filesystem::types::add_to_linker::<WasmStateImpl, HasFs>(linker, WasmStateImpl::vfs)?;
103+
bindings::filesystem::preopens::add_to_linker::<WasmStateImpl, HasFs>(
104104
linker,
105-
WasmStateImpl::filesystem,
105+
WasmStateImpl::vfs,
106106
)?;
107107
bindings::random::random::add_to_linker::<WasmStateImpl, WasiRandom>(linker, |t| {
108108
t.ctx().ctx.random()

host/src/tokio_helpers.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,3 @@ where
1111
{
1212
tokio::task::block_in_place(move || tokio::runtime::Handle::current().block_on(fut))
1313
}
14-
15-
/// Spawn blocking I/O.
16-
pub(crate) async fn blocking_io<F, T>(f: F) -> std::io::Result<T>
17-
where
18-
F: FnOnce() -> std::io::Result<T> + Send + 'static,
19-
T: Send + 'static,
20-
{
21-
tokio::task::spawn_blocking(f).await?
22-
}

0 commit comments

Comments
 (0)