Skip to content

Commit 09f2980

Browse files
authored
Add optional tmpfs storage for fuzzing corpus (#461)
* Add optional tmpfs storage for fuzzing corpus Add support for storing the node-local fuzzing corpus in tmpfs (RAM-based filesystem) for improved I/O performance during fuzzing operations. Key changes: - Add CORPUS_TMPFS_PATH environment variable to configure tmpfs location - Add cross-filesystem safe operations (copy+delete fallback for EXDEV) - Update Corpus class to use tmpfs path when enabled while maintaining correct remote path calculation for rsync synchronization - Add Helm chart configuration using /dev/shm (requires no host setup) - Update fuzzer-bot, coverage-bot, merger-bot, seed-gen deployments The feature is disabled by default. Enable by setting global.volumes.corpusTmpfs.enabled=true in values.yaml. Uses hostPath to /dev/shm/buttercup-corpus which is available on Linux systems without requiring any Kubernetes host configuration.
1 parent 22cebd4 commit 09f2980

File tree

12 files changed

+667
-10
lines changed

12 files changed

+667
-10
lines changed

common/src/buttercup/common/corpus.py

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,37 @@
1818
# TODO: this file is one of the few files that uses os.path.join. Switch to Path.
1919

2020

21+
def _get_corpus_storage_path(wdir: str, corpus_subpath: str) -> tuple[str, Path]:
22+
"""Calculate the local storage path and remote path for corpus.
23+
24+
When tmpfs is enabled, the local path is on tmpfs but the remote path
25+
is still calculated relative to NODE_DATA_DIR.
26+
27+
Args:
28+
wdir: The working directory (typically NODE_DATA_DIR)
29+
corpus_subpath: The corpus subdirectory (e.g., "{task_id}/buttercup_corpus_{harness}")
30+
31+
Returns:
32+
Tuple of (local_path, remote_path)
33+
"""
34+
tmpfs_path = node_local.get_corpus_tmpfs_path()
35+
36+
if tmpfs_path:
37+
# Store corpus on tmpfs
38+
local_path = os.path.join(str(tmpfs_path), corpus_subpath)
39+
# Remote path is still calculated relative to NODE_DATA_DIR structure
40+
# We compute it as if the corpus were stored in wdir
41+
canonical_path = os.path.join(wdir, corpus_subpath)
42+
remote_path = node_local.remote_path(Path(canonical_path))
43+
logger.debug(f"Using tmpfs for corpus: local={local_path}, remote={remote_path}")
44+
else:
45+
# Standard behavior - store corpus in wdir
46+
local_path = os.path.join(wdir, corpus_subpath)
47+
remote_path = node_local.remote_path(Path(local_path))
48+
49+
return local_path, remote_path
50+
51+
2152
def hash_file(fl: BinaryIO) -> str:
2253
h = hashlib.new("sha256")
2354
bts = fl.read(100)
@@ -28,9 +59,34 @@ def hash_file(fl: BinaryIO) -> str:
2859

2960

3061
class InputDir:
31-
def __init__(self, wdir: str, name: str, copy_corpus_max_size: int | None = None):
32-
self.path = os.path.join(wdir, name)
33-
self.remote_path = node_local.remote_path(Path(self.path))
62+
def __init__(
63+
self,
64+
wdir: str,
65+
name: str,
66+
copy_corpus_max_size: int | None = None,
67+
*,
68+
override_local_path: str | None = None,
69+
override_remote_path: Path | None = None,
70+
):
71+
"""Initialize an InputDir for corpus/crash storage.
72+
73+
Args:
74+
wdir: Working directory (typically NODE_DATA_DIR)
75+
name: Subdirectory name within wdir
76+
copy_corpus_max_size: Maximum size for copied corpus files
77+
override_local_path: If provided, use this as the local path instead of wdir/name
78+
override_remote_path: If provided, use this as the remote path
79+
"""
80+
if override_local_path is not None:
81+
self.path = override_local_path
82+
else:
83+
self.path = os.path.join(wdir, name)
84+
85+
if override_remote_path is not None:
86+
self.remote_path = override_remote_path
87+
else:
88+
self.remote_path = node_local.remote_path(Path(self.path))
89+
3490
self.copy_corpus_max_size = copy_corpus_max_size
3591
os.makedirs(self.path, exist_ok=True)
3692

@@ -110,9 +166,12 @@ def hash_corpus(cls, path: str) -> list[str]:
110166
continue
111167
file_path = os.path.join(path, file)
112168
try:
169+
assert os.path.isfile(file_path)
113170
with open(file_path, "rb") as f:
114171
hash_filename = hash_file(f)
115-
os.rename(file_path, os.path.join(path, hash_filename))
172+
dst_path = os.path.join(path, hash_filename)
173+
# Use shutil.move which handles cross-filesystem operations
174+
shutil.move(file_path, dst_path)
116175
hashed_files.append(hash_filename)
117176
except Exception as e:
118177
# Likely already hashed by another pod
@@ -224,7 +283,17 @@ def __init__(self, wdir: str, task_id: str, harness_name: str, copy_corpus_max_s
224283
self.task_id = task_id
225284
self.harness_name = harness_name
226285
self.corpus_dir = os.path.join(task_id, f"{CORPUS_DIR_NAME}_{harness_name}")
227-
super().__init__(wdir, self.corpus_dir, copy_corpus_max_size=copy_corpus_max_size)
286+
287+
# Get the storage paths, potentially using tmpfs for the local path
288+
local_path, remote_path = _get_corpus_storage_path(wdir, self.corpus_dir)
289+
290+
super().__init__(
291+
wdir,
292+
self.corpus_dir,
293+
copy_corpus_max_size=copy_corpus_max_size,
294+
override_local_path=local_path,
295+
override_remote_path=remote_path,
296+
)
228297

229298
def remove_any_merged(self, redis: Redis) -> None:
230299
merged_corpus_set = MergedCorpusSet(redis, self.task_id, self.harness_name)

common/src/buttercup/common/node_local.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Store the node local path for subsequent use
2+
import errno
23
import logging
34
import os
45
import shutil
@@ -14,6 +15,10 @@
1415

1516
node_local_path = os.getenv("NODE_DATA_DIR")
1617

18+
# Optional tmpfs path for corpus storage
19+
# When set, the fuzzing corpus will be stored on this tmpfs mount for improved performance
20+
corpus_tmpfs_path = os.getenv("CORPUS_TMPFS_PATH")
21+
1722

1823
NodeLocalPath: TypeAlias = Path
1924
RemotePath: TypeAlias = Path
@@ -24,6 +29,18 @@ def _get_root_path() -> NodeLocalPath:
2429
return NodeLocalPath(node_local_path)
2530

2631

32+
def get_corpus_tmpfs_path() -> Path | None:
33+
"""Return the tmpfs path for corpus storage if configured, otherwise None."""
34+
if corpus_tmpfs_path:
35+
return Path(corpus_tmpfs_path)
36+
return None
37+
38+
39+
def is_corpus_tmpfs_enabled() -> bool:
40+
"""Check if tmpfs corpus storage is enabled."""
41+
return corpus_tmpfs_path is not None
42+
43+
2744
class TmpDir:
2845
def __init__(self, path: Path):
2946
self.path = path
@@ -48,7 +65,10 @@ def temp_dir(root_path: Path) -> Iterator[TmpDir]:
4865

4966

5067
def rename_atomically(src: Path, dst: Path) -> Path | None:
51-
"""Rename a file atomically"""
68+
"""Rename a file atomically.
69+
70+
Falls back to copy+delete for cross-filesystem operations.
71+
"""
5272
src = Path(src)
5373
dst = Path(dst)
5474
dst.parent.mkdir(parents=True, exist_ok=True)
@@ -57,13 +77,50 @@ def rename_atomically(src: Path, dst: Path) -> Path | None:
5777
except OSError as e:
5878
# If the path already exists, it means another pod already downloaded it
5979
# we can just ignore this error and return None to signify that the path already exists
60-
if e.errno == 39:
80+
if e.errno == errno.ENOTEMPTY: # 39 = Directory not empty
6181
logger.debug(f"Local path {dst} already exists for {src}")
6282
return None
83+
# Handle cross-filesystem rename (errno 18 = EXDEV = Invalid cross-device link)
84+
if e.errno == errno.EXDEV:
85+
logger.debug(f"Cross-filesystem rename from {src} to {dst}, using copy+delete")
86+
return _copy_and_delete(src, dst)
6387
raise e
6488
return dst
6589

6690

91+
def _copy_and_delete(src: Path, dst: Path) -> Path | None:
92+
"""Copy a file/directory and delete the source. Used for cross-filesystem operations.
93+
94+
Note: Unlike os.rename, this intentionally returns None if dst already exists
95+
rather than overwriting. This supports the "first pod wins" pattern used by
96+
callers in distributed scenarios where concurrent pods may race to create the same file.
97+
"""
98+
try:
99+
if src.is_dir():
100+
# For directories, use copytree
101+
if dst.exists():
102+
logger.debug(f"Destination {dst} already exists, skipping copy")
103+
shutil.rmtree(src, ignore_errors=True)
104+
return None
105+
shutil.copytree(src, dst)
106+
shutil.rmtree(src, ignore_errors=True)
107+
else:
108+
# For files, use copy2 to preserve metadata
109+
if dst.exists():
110+
logger.debug(f"Destination {dst} already exists, skipping copy")
111+
try:
112+
os.unlink(src)
113+
except OSError:
114+
pass
115+
return None
116+
shutil.copy2(src, dst)
117+
os.unlink(src)
118+
return dst
119+
except Exception as e:
120+
logger.error(f"Failed to copy {src} to {dst}: {e}")
121+
raise
122+
123+
67124
def remote_path(local_path: NodeLocalPath) -> RemotePath:
68125
"""Convert the node local path to a remote path"""
69126
local_path = Path(local_path)

0 commit comments

Comments
 (0)