influxdata
diff --git a/‎guests/evil/src/lib.rs‎
Lines changed: 4 additions & 0 deletions b/‎guests/evil/src/lib.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎guests/evil/src/root/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎guests/evil/src/root/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎guests/evil/src/root/path_long.rs‎
Lines changed: 19 additions & 0 deletions b/‎guests/evil/src/root/path_long.rs‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎host/src/lib.rs‎
Lines changed: 1 addition & 3 deletions b/‎host/src/lib.rs‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎host/src/vfs/error.rs‎
Lines changed: 51 additions & 0 deletions b/‎host/src/vfs/error.rs‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎host/src/vfs/limits.rs‎
Lines changed: 37 additions & 0 deletions b/‎host/src/vfs/limits.rs‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎host/src/vfs.rs‎ renamed to ‎host/src/vfs/mod.rs‎
Lines changed: 62 additions & 92 deletions b/‎host/src/vfs.rs‎ renamed to ‎host/src/vfs/mod.rs‎
Lines changed: 62 additions & 92 deletions
@@ -52,6 +52,10 @@ impl Evil {
                 root: Box::new(root::not_tar::root),
                 udfs: Box::new(common::udfs_empty),
             },
+            "root::path_long" => Self {
+                root: Box::new(root::path_long::root),
+                udfs: Box::new(common::udfs_empty),
+            },
             "root::unsupported_entry" => Self {
                 root: Box::new(root::unsupported_entry::root),
                 udfs: Box::new(common::udfs_empty),
 
@@ -2,4 +2,5 @@
 pub(crate) mod invalid_entry;
 pub(crate) mod many_files;
 pub(crate) mod not_tar;
+pub(crate) mod path_long;
 pub(crate) mod unsupported_entry;
@@ -0,0 +1,19 @@
+//! Evil payloads that creates a file with a long path.
+
+/// Return root file system.
+#[expect(clippy::unnecessary_wraps, reason = "public API through export! macro")]
+pub(crate) fn root() -> Option<Vec<u8>> {
+    let mut ar = tar::Builder::new(Vec::new());
+
+    let limit: usize = std::env::var("limit").unwrap().parse().unwrap();
+
+    let mut header = tar::Header::new_gnu();
+    header
+        .set_path(std::iter::repeat_n('x', limit + 1).collect::<String>())
+        .unwrap();
+    header.set_size(0);
+    header.set_cksum();
+    ar.append(&header, b"".as_slice()).unwrap();
+
+    Some(ar.into_inner().unwrap())
+}
@@ -43,8 +43,6 @@ use crate::{
 #[cfg(test)]
 use datafusion_udf_wasm_bundle as _;
 #[cfg(test)]
-use insta as _;
-#[cfg(test)]
 use regex as _;
 #[cfg(test)]
 use wiremock as _;
@@ -356,7 +354,7 @@ impl WasmScalarUdf {
         let WasmComponentPrecompiled { engine, component } = component;
 
         // Create in-memory VFS
-        let vfs_state = VfsState::new(&permissions.vfs);
+        let vfs_state = VfsState::new(permissions.vfs.clone());
 
         // set up WASI p2 context
         let stderr = MemoryOutputPipe::new(1024);
 
@@ -0,0 +1,51 @@
+//! Error handling.
+
+use wasmtime_wasi::p2::FsError;
+
+/// Failed allocation error.
+#[derive(Debug, Clone)]
+#[expect(missing_copy_implementations, reason = "allow later extensions")]
+pub struct LimitExceeded {
+    /// Name of the allocation type/resource.
+    pub(crate) name: &'static str,
+
+    /// Allocation limit.
+    pub(crate) limit: u64,
+
+    /// Current allocation size.
+    pub(crate) current: u64,
+
+    /// Requested/additional allocation.
+    pub(crate) requested: u64,
+}
+
+impl std::fmt::Display for LimitExceeded {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self {
+            name,
+            limit,
+            current,
+            requested,
+        } = self;
+
+        write!(
+            f,
+            "{name} limit reached: limit<={limit} current=={current} requested+={requested}"
+        )
+    }
+}
+
+impl std::error::Error for LimitExceeded {}
+
+impl From<LimitExceeded> for std::io::Error {
+    fn from(e: LimitExceeded) -> Self {
+        Self::new(std::io::ErrorKind::QuotaExceeded, e.to_string())
+    }
+}
+
+impl From<LimitExceeded> for FsError {
+    fn from(e: LimitExceeded) -> Self {
+        let e: std::io::Error = e.into();
+        e.into()
+    }
+}
@@ -0,0 +1,37 @@
+//! Limit configuration.
+
+/// Limits for virtual filesystems.
+///
+/// # Depth
+/// Note that we do NOT per se limit the depth of the file system, since it is virtually not different from limiting
+/// [the number of inodes](Self::inodes). Expensive path traversal is further limited by
+/// [`max_path_length`](Self::max_path_length).
+#[derive(Debug, Clone)]
+#[expect(missing_copy_implementations, reason = "allow later extensions")]
+pub struct VfsLimits {
+    /// Maximum number of inodes.
+    pub inodes: u64,
+
+    /// Maximum number of bytes in size.
+    pub bytes: u64,
+
+    /// Maximum path length, in bytes.
+    pub max_path_length: u64,
+
+    /// Maximum path segment size, in bytes.
+    ///
+    /// Keep this to a rather small size to prevent super-linear complexity due to string hashing.
+    pub max_path_segment_size: u64,
+}
+
+impl Default for VfsLimits {
+    fn default() -> Self {
+        Self {
+            inodes: 10_000,
+            // 100MB
+            bytes: 100 * 1024 * 1024,
+            max_path_length: 255,
+            max_path_segment_size: 50,
+        }
+    }
+}
@@ -37,6 +37,16 @@ use wasmtime_wasi::{
     },
 };
 
+pub use crate::vfs::limits::VfsLimits;
+use crate::vfs::{
+    error::LimitExceeded,
+    path::{PathSegment, PathTraversal},
+};
+
+pub mod error;
+mod limits;
+mod path;
+
 /// Shared version of [`VfsNode`].
 type SharedVfsNode = Arc<RwLock<VfsNode>>;
 
@@ -61,7 +71,7 @@ enum VfsNodeKind {
     /// A directory containing child nodes.
     Directory {
         /// Child nodes indexed by name.
-        children: HashMap<Box<str>, SharedVfsNode>,
+        children: HashMap<PathSegment, SharedVfsNode>,
     },
 }
 
@@ -142,40 +152,28 @@ impl VfsNode {
     }
 
     /// Resolve a path from a starting node to a target node.
-    fn resolve_path(
-        root: SharedVfsNode,
+    fn traverse(
         start: SharedVfsNode,
-        path: &str,
+        directions: impl Iterator<Item = Result<PathTraversal, LimitExceeded>>,
     ) -> FsResult<SharedVfsNode> {
-        if path.is_empty() {
-            return Err(FsError::trap(ErrorCode::Invalid));
-        }
+        let mut current = start;
 
-        let mut parts = path.split('/').peekable();
-        let mut current = if parts.peek().expect("checked that not empty").is_empty() {
-            parts.next();
-            root
-        } else {
-            start
-        };
+        for direction in directions {
+            let direction = direction?;
 
-        for part in parts {
             let current_guard = current.read().unwrap();
             let next = match &current_guard.kind {
-                VfsNodeKind::Directory { children, .. } => match part {
-                    "" => {
-                        return Err(FsError::trap(ErrorCode::Invalid));
-                    }
-                    "." => Arc::clone(&current),
-                    ".." => current_guard
+                VfsNodeKind::Directory { children, .. } => match direction {
+                    PathTraversal::Stay => Arc::clone(&current),
+                    PathTraversal::Up => current_guard
                         .parent
                         .as_ref()
                         .map(|parent| parent.upgrade().expect("parent still valid"))
                         // note: `/..` = `/`, i.e. overshooting is allowed
                         .unwrap_or_else(|| Arc::clone(&current)),
-                    _ => Arc::clone(
+                    PathTraversal::Down(segment) => Arc::clone(
                         children
-                            .get(part)
+                            .get(&segment)
                             .ok_or_else(|| FsError::trap(ErrorCode::NoEntry))?,
                     ),
                 },
@@ -220,14 +218,14 @@ impl Allocation {
     }
 
     /// Increase allocation by given amount.
-    fn inc(&self, n: u64) -> Result<(), FailedAllocation> {
+    fn inc(&self, n: u64) -> Result<(), LimitExceeded> {
         self.n
             .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |old| {
                 let new = old.checked_add(n)?;
                 (new <= self.limit).then_some(new)
             })
             .map(|_| ())
-            .map_err(|current| FailedAllocation {
+            .map_err(|current| LimitExceeded {
                 name: self.name,
                 limit: self.limit,
                 current,
@@ -236,65 +234,6 @@ impl Allocation {
     }
 }
 
-/// Failed allocation error.
-#[derive(Debug)]
-struct FailedAllocation {
-    /// Name of the allocation type/resource.
-    name: &'static str,
-
-    /// Allocation limit.
-    limit: u64,
-
-    /// Current allocation size.
-    current: u64,
-
-    /// Requested/additional allocation.
-    requested: u64,
-}
-
-impl std::fmt::Display for FailedAllocation {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let Self {
-            name,
-            limit,
-            current,
-            requested,
-        } = self;
-
-        write!(
-            f,
-            "{name} limit reached: limit<={limit} current=={current} requested+={requested}"
-        )
-    }
-}
-
-impl From<FailedAllocation> for std::io::Error {
-    fn from(e: FailedAllocation) -> Self {
-        Self::new(std::io::ErrorKind::QuotaExceeded, e.to_string())
-    }
-}
-
-/// Limits for virtual filesystems.
-#[derive(Debug, Clone)]
-#[expect(missing_copy_implementations, reason = "allow later extensions")]
-pub struct VfsLimits {
-    /// Maximum number of inodes.
-    pub inodes: u64,
-
-    /// Maximum number of bytes in size.
-    pub bytes: u64,
-}
-
-impl Default for VfsLimits {
-    fn default() -> Self {
-        Self {
-            inodes: 10_000,
-            // 100MB
-            bytes: 100 * 1024 * 1024,
-        }
-    }
-}
-
 /// Current virtual filesystem allocation.
 #[derive(Debug)]
 struct VfsAllocation {
@@ -324,13 +263,18 @@ pub(crate) struct VfsState {
     /// Hash key for metadata hashes.
     metadata_hash_key: [u8; 16],
 
+    /// Limits.
+    limits: VfsLimits,
+
     /// Current allocation.
     allocation: VfsAllocation,
 }
 
 impl VfsState {
     /// Create a new empty VFS.
-    pub(crate) fn new(limits: &VfsLimits) -> Self {
+    pub(crate) fn new(limits: VfsLimits) -> Self {
+        let allocation = VfsAllocation::new(&limits);
+
         Self {
             root: Arc::new(RwLock::new(VfsNode {
                 kind: VfsNodeKind::Directory {
@@ -339,7 +283,8 @@ impl VfsState {
                 parent: None,
             })),
             metadata_hash_key: rand::rng().random(),
-            allocation: VfsAllocation::new(limits),
+            limits,
+            allocation,
         }
     }
 
@@ -380,10 +325,27 @@ impl VfsState {
             let path = entry.path()?;
             let path_str = path.to_string_lossy();
 
-            let (path_str, name) = path_str.rsplit_once("/").unwrap_or((".", &path_str));
-            let node =
-                VfsNode::resolve_path(Arc::clone(&self.root), Arc::clone(&self.root), path_str)
-                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+            // NOTE: we ignore "is_root" here because TAR files are unpacked at root level, hence CWD == root
+            let (_is_root, directions) = PathTraversal::parse(&path_str, &self.limits)?;
+            let mut directions = directions.collect::<Vec<_>>();
+
+            // Path traversal happens on the VFS tree, NOT on the parsed path, so the last part MUST be a valid segment.
+            // That also means that `/does_not_exist/../to_be_created` is NOT valid.
+            let name = match directions
+                .pop()
+                .expect("PathTraversal ensures that the path is not empty")?
+            {
+                PathTraversal::Down(segment) => segment,
+                other @ (PathTraversal::Stay | PathTraversal::Up) => {
+                    return Err(std::io::Error::new(
+                        std::io::ErrorKind::InvalidFilename,
+                        format!("TAR target MUST end in a valid filename, not {other}"),
+                    ));
+                }
+            };
+
+            let node = VfsNode::traverse(Arc::clone(&self.root), directions.into_iter())
+                .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
 
             let child = Arc::new(RwLock::new(VfsNode {
                 kind,
@@ -404,7 +366,7 @@ impl VfsState {
                     ));
                 }
                 VfsNodeKind::Directory { children } => {
-                    children.insert(name.into(), child);
+                    children.insert(name, child);
                 }
             }
         }
@@ -467,7 +429,15 @@ impl<'a> VfsCtxView<'a> {
     /// Get node at given path.
     fn node_at(&self, res: Resource<Descriptor>, path: &str) -> FsResult<SharedVfsNode> {
         let node = self.node(res)?;
-        VfsNode::resolve_path(Arc::clone(&self.vfs_state.root), node, path)
+
+        let (is_root, directions) = PathTraversal::parse(path, &self.vfs_state.limits)?;
+
+        let start = if is_root {
+            Arc::clone(&self.vfs_state.root)
+        } else {
+            node
+        };
+        VfsNode::traverse(start, directions)
     }
 }