Skip to content

Commit 4e6e1fb

Browse files
committed
refactor: harden virtual file system
- add more limits - improve path traversal to be cleaner and better tested
1 parent 6f09233 commit 4e6e1fb

File tree

11 files changed

+688
-107
lines changed

11 files changed

+688
-107
lines changed

guests/evil/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ impl Evil {
5252
root: Box::new(root::not_tar::root),
5353
udfs: Box::new(common::udfs_empty),
5454
},
55+
"root::path_long" => Self {
56+
root: Box::new(root::path_long::root),
57+
udfs: Box::new(common::udfs_empty),
58+
},
5559
"root::unsupported_entry" => Self {
5660
root: Box::new(root::unsupported_entry::root),
5761
udfs: Box::new(common::udfs_empty),

guests/evil/src/root/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
pub(crate) mod invalid_entry;
33
pub(crate) mod many_files;
44
pub(crate) mod not_tar;
5+
pub(crate) mod path_long;
56
pub(crate) mod unsupported_entry;

guests/evil/src/root/path_long.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//! Evil payloads that creates a file with a long path.
2+
3+
/// Return root file system.
4+
#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
5+
pub(crate) fn root() -> Option<Vec<u8>> {
6+
let mut ar = tar::Builder::new(Vec::new());
7+
8+
let limit: usize = std::env::var("limit").unwrap().parse().unwrap();
9+
10+
let mut header = tar::Header::new_gnu();
11+
header
12+
.set_path(std::iter::repeat_n('x', limit + 1).collect::<String>())
13+
.unwrap();
14+
header.set_size(0);
15+
header.set_cksum();
16+
ar.append(&header, b"".as_slice()).unwrap();
17+
18+
Some(ar.into_inner().unwrap())
19+
}

host/src/lib.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@ use crate::{
4343
#[cfg(test)]
4444
use datafusion_udf_wasm_bundle as _;
4545
#[cfg(test)]
46-
use insta as _;
47-
#[cfg(test)]
4846
use regex as _;
4947
#[cfg(test)]
5048
use wiremock as _;
@@ -356,7 +354,7 @@ impl WasmScalarUdf {
356354
let WasmComponentPrecompiled { engine, component } = component;
357355

358356
// Create in-memory VFS
359-
let vfs_state = VfsState::new(&permissions.vfs);
357+
let vfs_state = VfsState::new(permissions.vfs.clone());
360358

361359
// set up WASI p2 context
362360
let stderr = MemoryOutputPipe::new(1024);

host/src/vfs/error.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
//! Error handling.
2+
3+
use wasmtime_wasi::p2::FsError;
4+
5+
/// Failed allocation error.
6+
#[derive(Debug, Clone)]
7+
#[expect(missing_copy_implementations, reason = "allow later extensions")]
8+
pub struct LimitExceeded {
9+
/// Name of the allocation type/resource.
10+
pub(crate) name: &'static str,
11+
12+
/// Allocation limit.
13+
pub(crate) limit: u64,
14+
15+
/// Current allocation size.
16+
pub(crate) current: u64,
17+
18+
/// Requested/additional allocation.
19+
pub(crate) requested: u64,
20+
}
21+
22+
impl std::fmt::Display for LimitExceeded {
23+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24+
let Self {
25+
name,
26+
limit,
27+
current,
28+
requested,
29+
} = self;
30+
31+
write!(
32+
f,
33+
"{name} limit reached: limit<={limit} current=={current} requested+={requested}"
34+
)
35+
}
36+
}
37+
38+
impl std::error::Error for LimitExceeded {}
39+
40+
impl From<LimitExceeded> for std::io::Error {
41+
fn from(e: LimitExceeded) -> Self {
42+
Self::new(std::io::ErrorKind::QuotaExceeded, e.to_string())
43+
}
44+
}
45+
46+
impl From<LimitExceeded> for FsError {
47+
fn from(e: LimitExceeded) -> Self {
48+
let e: std::io::Error = e.into();
49+
e.into()
50+
}
51+
}

host/src/vfs/limits.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//! Limit configuration.
2+
3+
/// Limits for virtual filesystems.
4+
///
5+
/// # Depth
6+
/// Note that we do NOT per se limit the depth of the file system, since it is virtually not different from limiting
7+
/// [the number of inodes](Self::inodes). Expensive path traversal is further limited by
8+
/// [`max_path_length`](Self::max_path_length).
9+
#[derive(Debug, Clone)]
10+
#[expect(missing_copy_implementations, reason = "allow later extensions")]
11+
pub struct VfsLimits {
12+
/// Maximum number of inodes.
13+
pub inodes: u64,
14+
15+
/// Maximum number of bytes in size.
16+
pub bytes: u64,
17+
18+
/// Maximum path length, in bytes.
19+
pub max_path_length: u64,
20+
21+
/// Maximum path segment size, in bytes.
22+
///
23+
/// Keep this to a rather small size to prevent super-linear complexity due to string hashing.
24+
pub max_path_segment_size: u64,
25+
}
26+
27+
impl Default for VfsLimits {
28+
fn default() -> Self {
29+
Self {
30+
inodes: 10_000,
31+
// 100MB
32+
bytes: 100 * 1024 * 1024,
33+
max_path_length: 255,
34+
max_path_segment_size: 50,
35+
}
36+
}
37+
}

host/src/vfs.rs renamed to host/src/vfs/mod.rs

Lines changed: 62 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ use wasmtime_wasi::{
3737
},
3838
};
3939

40+
pub use crate::vfs::limits::VfsLimits;
41+
use crate::vfs::{
42+
error::LimitExceeded,
43+
path::{PathSegment, PathTraversal},
44+
};
45+
46+
pub mod error;
47+
mod limits;
48+
mod path;
49+
4050
/// Shared version of [`VfsNode`].
4151
type SharedVfsNode = Arc<RwLock<VfsNode>>;
4252

@@ -61,7 +71,7 @@ enum VfsNodeKind {
6171
/// A directory containing child nodes.
6272
Directory {
6373
/// Child nodes indexed by name.
64-
children: HashMap<Box<str>, SharedVfsNode>,
74+
children: HashMap<PathSegment, SharedVfsNode>,
6575
},
6676
}
6777

@@ -142,40 +152,28 @@ impl VfsNode {
142152
}
143153

144154
/// Resolve a path from a starting node to a target node.
145-
fn resolve_path(
146-
root: SharedVfsNode,
155+
fn traverse(
147156
start: SharedVfsNode,
148-
path: &str,
157+
directions: impl Iterator<Item = Result<PathTraversal, LimitExceeded>>,
149158
) -> FsResult<SharedVfsNode> {
150-
if path.is_empty() {
151-
return Err(FsError::trap(ErrorCode::Invalid));
152-
}
159+
let mut current = start;
153160

154-
let mut parts = path.split('/').peekable();
155-
let mut current = if parts.peek().expect("checked that not empty").is_empty() {
156-
parts.next();
157-
root
158-
} else {
159-
start
160-
};
161+
for direction in directions {
162+
let direction = direction?;
161163

162-
for part in parts {
163164
let current_guard = current.read().unwrap();
164165
let next = match &current_guard.kind {
165-
VfsNodeKind::Directory { children, .. } => match part {
166-
"" => {
167-
return Err(FsError::trap(ErrorCode::Invalid));
168-
}
169-
"." => Arc::clone(&current),
170-
".." => current_guard
166+
VfsNodeKind::Directory { children, .. } => match direction {
167+
PathTraversal::Stay => Arc::clone(&current),
168+
PathTraversal::Up => current_guard
171169
.parent
172170
.as_ref()
173171
.map(|parent| parent.upgrade().expect("parent still valid"))
174172
// note: `/..` = `/`, i.e. overshooting is allowed
175173
.unwrap_or_else(|| Arc::clone(&current)),
176-
_ => Arc::clone(
174+
PathTraversal::Down(segment) => Arc::clone(
177175
children
178-
.get(part)
176+
.get(&segment)
179177
.ok_or_else(|| FsError::trap(ErrorCode::NoEntry))?,
180178
),
181179
},
@@ -220,14 +218,14 @@ impl Allocation {
220218
}
221219

222220
/// Increase allocation by given amount.
223-
fn inc(&self, n: u64) -> Result<(), FailedAllocation> {
221+
fn inc(&self, n: u64) -> Result<(), LimitExceeded> {
224222
self.n
225223
.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |old| {
226224
let new = old.checked_add(n)?;
227225
(new <= self.limit).then_some(new)
228226
})
229227
.map(|_| ())
230-
.map_err(|current| FailedAllocation {
228+
.map_err(|current| LimitExceeded {
231229
name: self.name,
232230
limit: self.limit,
233231
current,
@@ -236,65 +234,6 @@ impl Allocation {
236234
}
237235
}
238236

239-
/// Failed allocation error.
240-
#[derive(Debug)]
241-
struct FailedAllocation {
242-
/// Name of the allocation type/resource.
243-
name: &'static str,
244-
245-
/// Allocation limit.
246-
limit: u64,
247-
248-
/// Current allocation size.
249-
current: u64,
250-
251-
/// Requested/additional allocation.
252-
requested: u64,
253-
}
254-
255-
impl std::fmt::Display for FailedAllocation {
256-
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
257-
let Self {
258-
name,
259-
limit,
260-
current,
261-
requested,
262-
} = self;
263-
264-
write!(
265-
f,
266-
"{name} limit reached: limit<={limit} current=={current} requested+={requested}"
267-
)
268-
}
269-
}
270-
271-
impl From<FailedAllocation> for std::io::Error {
272-
fn from(e: FailedAllocation) -> Self {
273-
Self::new(std::io::ErrorKind::QuotaExceeded, e.to_string())
274-
}
275-
}
276-
277-
/// Limits for virtual filesystems.
278-
#[derive(Debug, Clone)]
279-
#[expect(missing_copy_implementations, reason = "allow later extensions")]
280-
pub struct VfsLimits {
281-
/// Maximum number of inodes.
282-
pub inodes: u64,
283-
284-
/// Maximum number of bytes in size.
285-
pub bytes: u64,
286-
}
287-
288-
impl Default for VfsLimits {
289-
fn default() -> Self {
290-
Self {
291-
inodes: 10_000,
292-
// 100MB
293-
bytes: 100 * 1024 * 1024,
294-
}
295-
}
296-
}
297-
298237
/// Current virtual filesystem allocation.
299238
#[derive(Debug)]
300239
struct VfsAllocation {
@@ -324,13 +263,18 @@ pub(crate) struct VfsState {
324263
/// Hash key for metadata hashes.
325264
metadata_hash_key: [u8; 16],
326265

266+
/// Limits.
267+
limits: VfsLimits,
268+
327269
/// Current allocation.
328270
allocation: VfsAllocation,
329271
}
330272

331273
impl VfsState {
332274
/// Create a new empty VFS.
333-
pub(crate) fn new(limits: &VfsLimits) -> Self {
275+
pub(crate) fn new(limits: VfsLimits) -> Self {
276+
let allocation = VfsAllocation::new(&limits);
277+
334278
Self {
335279
root: Arc::new(RwLock::new(VfsNode {
336280
kind: VfsNodeKind::Directory {
@@ -339,7 +283,8 @@ impl VfsState {
339283
parent: None,
340284
})),
341285
metadata_hash_key: rand::rng().random(),
342-
allocation: VfsAllocation::new(limits),
286+
limits,
287+
allocation,
343288
}
344289
}
345290

@@ -380,10 +325,27 @@ impl VfsState {
380325
let path = entry.path()?;
381326
let path_str = path.to_string_lossy();
382327

383-
let (path_str, name) = path_str.rsplit_once("/").unwrap_or((".", &path_str));
384-
let node =
385-
VfsNode::resolve_path(Arc::clone(&self.root), Arc::clone(&self.root), path_str)
386-
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
328+
// NOTE: we ignore "is_root" here because TAR files are unpacked at root level, hence CWD == root
329+
let (_is_root, directions) = PathTraversal::parse(&path_str, &self.limits)?;
330+
let mut directions = directions.collect::<Vec<_>>();
331+
332+
// Path traversal happens on the VFS tree, NOT on the parsed path, so the last part MUST be a valid segment.
333+
// That also means that `/does_not_exist/../to_be_created` is NOT valid.
334+
let name = match directions
335+
.pop()
336+
.expect("PathTraversal ensures that the path is not empty")?
337+
{
338+
PathTraversal::Down(segment) => segment,
339+
other @ (PathTraversal::Stay | PathTraversal::Up) => {
340+
return Err(std::io::Error::new(
341+
std::io::ErrorKind::InvalidFilename,
342+
format!("TAR target MUST end in a valid filename, not {other}"),
343+
));
344+
}
345+
};
346+
347+
let node = VfsNode::traverse(Arc::clone(&self.root), directions.into_iter())
348+
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
387349

388350
let child = Arc::new(RwLock::new(VfsNode {
389351
kind,
@@ -404,7 +366,7 @@ impl VfsState {
404366
));
405367
}
406368
VfsNodeKind::Directory { children } => {
407-
children.insert(name.into(), child);
369+
children.insert(name, child);
408370
}
409371
}
410372
}
@@ -467,7 +429,15 @@ impl<'a> VfsCtxView<'a> {
467429
/// Get node at given path.
468430
fn node_at(&self, res: Resource<Descriptor>, path: &str) -> FsResult<SharedVfsNode> {
469431
let node = self.node(res)?;
470-
VfsNode::resolve_path(Arc::clone(&self.vfs_state.root), node, path)
432+
433+
let (is_root, directions) = PathTraversal::parse(path, &self.vfs_state.limits)?;
434+
435+
let start = if is_root {
436+
Arc::clone(&self.vfs_state.root)
437+
} else {
438+
node
439+
};
440+
VfsNode::traverse(start, directions)
471441
}
472442
}
473443

0 commit comments

Comments
 (0)