From d5d81bc33ba38d9078e1bbd920f5483836764f4b Mon Sep 17 00:00:00 2001 From: Ronald Eytchison Date: Mon, 26 Jan 2026 18:33:38 -0500 Subject: [PATCH] Only copy generated seeds to node-local corpus --- common/src/buttercup/common/corpus.py | 15 +++--- common/tests/test_corpus.py | 75 +++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/common/src/buttercup/common/corpus.py b/common/src/buttercup/common/corpus.py index a9832905..16bd6d7f 100644 --- a/common/src/buttercup/common/corpus.py +++ b/common/src/buttercup/common/corpus.py @@ -37,18 +37,21 @@ def __init__(self, wdir: str, name: str, copy_corpus_max_size: int | None = None def basename(self) -> str: return os.path.basename(self.path) - def copy_file(self, src_file: str) -> str: + def copy_file(self, src_file: str, only_local: bool = False) -> str: with open(src_file, "rb") as f: nm = hash_file(f) dst = os.path.join(self.path, nm) - dst_remote = os.path.join(self.remote_path, nm) - os.makedirs(self.remote_path, exist_ok=True) - # Make the file available both node-local and remote + # Copy to local corpus shutil.copy(src_file, dst) - shutil.copy(dst, dst_remote) + if not only_local: + dst_remote = os.path.join(self.remote_path, nm) + os.makedirs(self.remote_path, exist_ok=True) + # Copy to remote corpus + shutil.copy(dst, dst_remote) return dst def copy_corpus(self, src_dir: str) -> list[str]: + """Copy files from src_dir to local corpus only.""" files = [] for file in os.listdir(src_dir): file_path = os.path.join(src_dir, file) @@ -60,7 +63,7 @@ def copy_corpus(self, src_dir: str) -> list[str]: self.copy_corpus_max_size, ) continue - files.append(self.copy_file(file_path)) + files.append(self.copy_file(file_path, only_local=True)) return files def local_corpus_size(self) -> int: diff --git a/common/tests/test_corpus.py b/common/tests/test_corpus.py index 455296b2..a412b5ae 100644 --- a/common/tests/test_corpus.py +++ b/common/tests/test_corpus.py @@ -227,3 +227,78 @@ def test_input_dir_copy_corpus_all_files_too_large(temp_dir, mock_node_local): # Should return empty list assert copied_files == [] assert input_dir.local_corpus_count() == 0 + + +def test_copy_corpus_only_local(temp_dir): + """Test that copy_corpus copies only to node-local (not remote).""" + remote_path = os.path.join(temp_dir, "remote") + with patch("buttercup.common.node_local.remote_path", return_value=remote_path): + input_dir = InputDir(temp_dir, "test_corpus") + + src_dir = os.path.join(temp_dir, "src_corpus") + os.makedirs(src_dir, exist_ok=True) + + # Create a test file + file_path = os.path.join(src_dir, "test_file") + with open(file_path, "wb") as f: + f.write(b"test content") + + copied_files = input_dir.copy_corpus(src_dir) + + # File should exist locally + assert len(copied_files) == 1 + assert os.path.exists(copied_files[0]) + + # Remote file should not exist + remote_file = os.path.join(remote_path, os.path.basename(copied_files[0])) + assert not os.path.exists(remote_file) + + +def test_copy_file_only_local(temp_dir): + """Test that copy_file with only_local=True skips remote copy.""" + remote_path = os.path.join(temp_dir, "remote") + with patch("buttercup.common.node_local.remote_path", return_value=remote_path): + input_dir = InputDir(temp_dir, "test_corpus") + + src_dir = os.path.join(temp_dir, "src_corpus") + os.makedirs(src_dir, exist_ok=True) + + # Create a test file + file_path = os.path.join(src_dir, "test_file") + with open(file_path, "wb") as f: + f.write(b"test content") + + # Copy file with only_local=True + dst = input_dir.copy_file(file_path, only_local=True) + + # File should exist locally + assert os.path.exists(dst) + + # Remote file should not exist + remote_file = os.path.join(remote_path, os.path.basename(dst)) + assert not os.path.exists(remote_file) + + +def test_copy_file_with_remote(temp_dir): + """Test that copy_file with only_local=False copies to both local and remote.""" + remote_path = os.path.join(temp_dir, "remote") + with patch("buttercup.common.node_local.remote_path", return_value=remote_path): + input_dir = InputDir(temp_dir, "test_corpus") + + src_dir = os.path.join(temp_dir, "src_corpus") + os.makedirs(src_dir, exist_ok=True) + + # Create a test file + file_path = os.path.join(src_dir, "test_file") + with open(file_path, "wb") as f: + f.write(b"test content") + + # Copy file with only_local=False (explicit) + dst = input_dir.copy_file(file_path, only_local=False) + + # File should exist locally + assert os.path.exists(dst) + + # Same file should exist in remote + remote_file = os.path.join(remote_path, os.path.basename(dst)) + assert os.path.exists(remote_file)