Skip to content

Commit 968838c

Browse files
authored
Merge pull request #162 from influxdata/crepererum/evil-payload
feat: infrastructure for "evil" test payloads
2 parents 0c1dcc7 + 9f9544c commit 968838c

File tree

20 files changed

+534
-2
lines changed

20 files changed

+534
-2
lines changed

Cargo.lock

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ resolver = "3"
33
members = [
44
"arrow2bytes",
55
"guests/bundle",
6+
"guests/evil",
67
"guests/python",
78
"guests/rust",
89
"host",
@@ -24,6 +25,7 @@ datafusion-expr = { version = "49.0.1", default-features = false }
2425
datafusion-sql = { version = "49.0.1", default-features = false }
2526
datafusion-udf-wasm-arrow2bytes = { path = "arrow2bytes", version = "0.1.0" }
2627
datafusion-udf-wasm-bundle = { path = "guests/bundle", version = "0.1.0" }
28+
datafusion-udf-wasm-evil = { path = "guests/evil", version = "0.1.0" }
2729
datafusion-udf-wasm-guest = { path = "guests/rust", version = "0.1.0" }
2830
datafusion-udf-wasm-host = { path = "host", version = "0.1.0" }
2931
datafusion-udf-wasm-python = { path = "guests/python", version = "0.1.0" }
@@ -33,6 +35,7 @@ hyper = { version = "1.8", default-features = false }
3335
insta = { version = "1.43.2", "default-features" = false }
3436
log = { version = "0.4.28", default-features = false }
3537
pyo3 = { version = "0.27.1", default-features = false, features = ["macros"] }
38+
regex = { version = "1", default-features = false }
3639
sqlparser = { version = "0.55.0", default-features = false, features = [
3740
"std",
3841
"visitor"

guests/Justfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
mod evil
12
mod python
23
mod rust

guests/bundle/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ license.workspace = true
66

77
[build-dependencies]
88
# these need to be marked as build dependencies so the build script reruns whenever they change
9+
datafusion-udf-wasm-evil = { workspace = true, optional = true }
910
datafusion-udf-wasm-guest = { workspace = true, optional = true }
1011
datafusion-udf-wasm-python = { workspace = true, optional = true }
1112
# the actual build-time dependencies
1213
serde_json = "1.0.145"
1314

1415
[features]
16+
evil = ["dep:datafusion-udf-wasm-evil"]
1517
example = ["dep:datafusion-udf-wasm-guest"]
1618
python = ["dep:datafusion-udf-wasm-python"]
1719

guests/bundle/build.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,15 @@ fn just_build(cwd: &Path, just_cmd: &str, cargo_target_dir: &Path) {
287287
///
288288
/// This must be in-sync with the feature list in `Cargo.toml`.
289289
const FEATURES: &[Feature] = &[
290+
Feature {
291+
name: "evil",
292+
package: "datafusion-udf-wasm-evil",
293+
just_cmds: &[JustCmd {
294+
artifact_type: ArtifactType::Lib,
295+
const_name: "EVIL",
296+
doc: "Evil payloads.",
297+
}],
298+
},
290299
Feature {
291300
name: "example",
292301
package: "datafusion-udf-wasm-guest",

guests/evil/Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "datafusion-udf-wasm-evil"
3+
version.workspace = true
4+
edition.workspace = true
5+
license.workspace = true
6+
7+
[lib]
8+
crate-type = ["cdylib"]
9+
10+
[dependencies]
11+
arrow.workspace = true
12+
datafusion-common.workspace = true
13+
datafusion-expr.workspace = true
14+
datafusion-udf-wasm-guest.workspace = true
15+
tar.workspace = true
16+
17+
[lints]
18+
workspace = true

guests/evil/Justfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[private]
2+
build profile:
3+
@echo ::group::guests::evil::build-{{profile}}
4+
cargo build --target=wasm32-wasip2 --profile={{replace(profile, "debug", "dev")}}
5+
@echo ::endgroup::
6+
7+
# build library in debug mode
8+
build-debug: (build "debug")
9+
10+
# build library in release mode
11+
build-release: (build "release")
12+
13+
# checks build
14+
check-build: build-debug

guests/evil/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Evil Test Payloads
2+
3+
These are payloads that try to stress the WASM sandbox. They are NOT meant to be used in production.
4+
5+
For efficiency we generate a single WASM binary that is multiplexed using the `EVIL` environment variable.

guests/evil/src/lib.rs

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
//! Library that multiplexes different evil payloads.
2+
//!
3+
//! We need that because evil payloads may act long before the actual UDFs are available.
4+
use std::sync::Arc;
5+
6+
use datafusion_common::Result as DataFusionResult;
7+
use datafusion_expr::ScalarUDFImpl;
8+
use datafusion_udf_wasm_guest::export;
9+
10+
mod root;
11+
mod runtime;
12+
13+
/// Method that returns the root filesystem.
14+
type RootFn = Box<dyn Fn() -> Option<Vec<u8>>>;
15+
16+
/// Method that enumerates UDFs.
17+
type UdfsFn = Box<dyn Fn(String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>>>;
18+
19+
/// An evil.
20+
struct Evil {
21+
/// Root file system.
22+
root: RootFn,
23+
24+
/// Returns UDFs.
25+
udfs: UdfsFn,
26+
}
27+
28+
impl Evil {
29+
/// Get evil, multiplexed by env.
30+
fn get() -> Self {
31+
match std::env::var("EVIL").expect("evil specified").as_str() {
32+
"root::many_files" => Self {
33+
root: Box::new(root::many_files::root),
34+
udfs: Box::new(root::many_files::udfs),
35+
},
36+
"runtime" => Self {
37+
root: Box::new(runtime::root),
38+
udfs: Box::new(runtime::udfs),
39+
},
40+
other => panic!("unknown evil: {other}"),
41+
}
42+
}
43+
}
44+
45+
/// Return root file system.
46+
fn root() -> Option<Vec<u8>> {
47+
(Evil::get().root)()
48+
}
49+
50+
/// Returns our evil UDFs.
51+
fn udfs(source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
52+
(Evil::get().udfs)(source)
53+
}
54+
55+
export! {
56+
root_fs_tar: root,
57+
scalar_udfs: udfs,
58+
}

guests/evil/src/root/many_files.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//! Evil payloads that creates A LOT of files.
2+
use std::sync::Arc;
3+
4+
use datafusion_common::Result as DataFusionResult;
5+
use datafusion_expr::ScalarUDFImpl;
6+
7+
/// Return root file system.
8+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
9+
pub(crate) fn root() -> Option<Vec<u8>> {
10+
let mut ar = tar::Builder::new(Vec::new());
11+
12+
const LIMIT: u64 = 10_000;
13+
for i in 0..=LIMIT {
14+
let mut header = tar::Header::new_gnu();
15+
header.set_path(i.to_string()).unwrap();
16+
header.set_size(0);
17+
header.set_cksum();
18+
19+
ar.append(&header, b"".as_slice()).unwrap();
20+
}
21+
22+
Some(ar.into_inner().unwrap())
23+
}
24+
25+
/// Returns UDFs.
26+
///
27+
/// The passed `source` is ignored.
28+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
29+
pub(crate) fn udfs(_source: String) -> DataFusionResult<Vec<Arc<dyn ScalarUDFImpl>>> {
30+
Ok(vec![])
31+
}

0 commit comments

Comments
 (0)