Skip to content

Commit 240112f

Browse files
authored
Only copy generated seeds to node-local corpus (#460)
1 parent 3c3ae36 commit 240112f

File tree

2 files changed

+84
-6
lines changed

2 files changed

+84
-6
lines changed

common/src/buttercup/common/corpus.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,21 @@ def __init__(self, wdir: str, name: str, copy_corpus_max_size: int | None = None
3737
def basename(self) -> str:
3838
return os.path.basename(self.path)
3939

40-
def copy_file(self, src_file: str) -> str:
40+
def copy_file(self, src_file: str, only_local: bool = False) -> str:
4141
with open(src_file, "rb") as f:
4242
nm = hash_file(f)
4343
dst = os.path.join(self.path, nm)
44-
dst_remote = os.path.join(self.remote_path, nm)
45-
os.makedirs(self.remote_path, exist_ok=True)
46-
# Make the file available both node-local and remote
44+
# Copy to local corpus
4745
shutil.copy(src_file, dst)
48-
shutil.copy(dst, dst_remote)
46+
if not only_local:
47+
dst_remote = os.path.join(self.remote_path, nm)
48+
os.makedirs(self.remote_path, exist_ok=True)
49+
# Copy to remote corpus
50+
shutil.copy(dst, dst_remote)
4951
return dst
5052

5153
def copy_corpus(self, src_dir: str) -> list[str]:
54+
"""Copy files from src_dir to local corpus only."""
5255
files = []
5356
for file in os.listdir(src_dir):
5457
file_path = os.path.join(src_dir, file)
@@ -60,7 +63,7 @@ def copy_corpus(self, src_dir: str) -> list[str]:
6063
self.copy_corpus_max_size,
6164
)
6265
continue
63-
files.append(self.copy_file(file_path))
66+
files.append(self.copy_file(file_path, only_local=True))
6467
return files
6568

6669
def local_corpus_size(self) -> int:

common/tests/test_corpus.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,78 @@ def test_input_dir_copy_corpus_all_files_too_large(temp_dir, mock_node_local):
227227
# Should return empty list
228228
assert copied_files == []
229229
assert input_dir.local_corpus_count() == 0
230+
231+
232+
def test_copy_corpus_only_local(temp_dir):
233+
"""Test that copy_corpus copies only to node-local (not remote)."""
234+
remote_path = os.path.join(temp_dir, "remote")
235+
with patch("buttercup.common.node_local.remote_path", return_value=remote_path):
236+
input_dir = InputDir(temp_dir, "test_corpus")
237+
238+
src_dir = os.path.join(temp_dir, "src_corpus")
239+
os.makedirs(src_dir, exist_ok=True)
240+
241+
# Create a test file
242+
file_path = os.path.join(src_dir, "test_file")
243+
with open(file_path, "wb") as f:
244+
f.write(b"test content")
245+
246+
copied_files = input_dir.copy_corpus(src_dir)
247+
248+
# File should exist locally
249+
assert len(copied_files) == 1
250+
assert os.path.exists(copied_files[0])
251+
252+
# Remote file should not exist
253+
remote_file = os.path.join(remote_path, os.path.basename(copied_files[0]))
254+
assert not os.path.exists(remote_file)
255+
256+
257+
def test_copy_file_only_local(temp_dir):
258+
"""Test that copy_file with only_local=True skips remote copy."""
259+
remote_path = os.path.join(temp_dir, "remote")
260+
with patch("buttercup.common.node_local.remote_path", return_value=remote_path):
261+
input_dir = InputDir(temp_dir, "test_corpus")
262+
263+
src_dir = os.path.join(temp_dir, "src_corpus")
264+
os.makedirs(src_dir, exist_ok=True)
265+
266+
# Create a test file
267+
file_path = os.path.join(src_dir, "test_file")
268+
with open(file_path, "wb") as f:
269+
f.write(b"test content")
270+
271+
# Copy file with only_local=True
272+
dst = input_dir.copy_file(file_path, only_local=True)
273+
274+
# File should exist locally
275+
assert os.path.exists(dst)
276+
277+
# Remote file should not exist
278+
remote_file = os.path.join(remote_path, os.path.basename(dst))
279+
assert not os.path.exists(remote_file)
280+
281+
282+
def test_copy_file_with_remote(temp_dir):
283+
"""Test that copy_file with only_local=False copies to both local and remote."""
284+
remote_path = os.path.join(temp_dir, "remote")
285+
with patch("buttercup.common.node_local.remote_path", return_value=remote_path):
286+
input_dir = InputDir(temp_dir, "test_corpus")
287+
288+
src_dir = os.path.join(temp_dir, "src_corpus")
289+
os.makedirs(src_dir, exist_ok=True)
290+
291+
# Create a test file
292+
file_path = os.path.join(src_dir, "test_file")
293+
with open(file_path, "wb") as f:
294+
f.write(b"test content")
295+
296+
# Copy file with only_local=False (explicit)
297+
dst = input_dir.copy_file(file_path, only_local=False)
298+
299+
# File should exist locally
300+
assert os.path.exists(dst)
301+
302+
# Same file should exist in remote
303+
remote_file = os.path.join(remote_path, os.path.basename(dst))
304+
assert os.path.exists(remote_file)

0 commit comments

Comments
 (0)