Skip to content

Commit a6afc80

Browse files
authored
Merge pull request #5254 from garlick/issue#5245
Ignore errors when starting flux from a restart dump containing giant blobs
2 parents 2bdc5e9 + e067dc4 commit a6afc80

File tree

4 files changed

+49
-1
lines changed

4 files changed

+49
-1
lines changed

doc/man1/flux-restore.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ OPTIONS
5353
Bypass the broker content cache and interact directly with the backing
5454
store. Performance will vary depending on the content of the archive.
5555

56+
**--size-limit**\ =\ *SIZE*
57+
Skip restoring keys that exceed SIZE bytes (default: no limit).
5658

5759
RESOURCES
5860
=========

etc/rc1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ if test $RANK -eq 0; then
4747
flux module load ${backingmod} truncate
4848
fi
4949
echo "restoring content from ${dumpfile}"
50-
flux restore --quiet --checkpoint ${dumpfile}
50+
flux restore --quiet --checkpoint --size-limit=104857600 ${dumpfile}
5151
if test -n "${dumplink}"; then
5252
rm -f ${dumplink}
5353
fi

src/cmd/builtin/restore.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ static int content_flags;
3737
static time_t restore_timestamp;
3838
static int blobcount;
3939
static int keycount;
40+
static int blob_size_limit;
4041

4142
static void progress (int delta_blob, int delta_keys)
4243
{
@@ -270,6 +271,17 @@ static json_t *restore_snapshot (struct archive *ar, flux_t *h)
270271
else if (type == AE_IFREG) {
271272
int size = archive_entry_size (entry);
272273

274+
if (blob_size_limit > 0 && size > blob_size_limit) {
275+
fprintf (stderr,
276+
"%s%s size %d exceeds %d limit, skipping\n",
277+
(!quiet && !verbose) ? "\r" : "",
278+
path,
279+
size,
280+
blob_size_limit);
281+
// N.B. archive_read_next_header() skips unconsumed data
282+
// automatically so it is safe to "continue" here.
283+
continue;
284+
}
273285
if (size > bufsize) {
274286
void *newbuf;
275287
if (!(newbuf = realloc (buf, size)))
@@ -354,6 +366,7 @@ static int cmd_restore (optparse_t *p, int ac, char *av[])
354366
content_flags |= CONTENT_FLAG_CACHE_BYPASS;
355367
kvs_checkpoint_flags |= KVS_CHECKPOINT_FLAG_CACHE_BYPASS;
356368
}
369+
blob_size_limit = optparse_get_int (p, "size-limit", 0);
357370

358371
h = builtin_get_flux_handle (p);
359372
ar = restore_create (infile);
@@ -458,6 +471,9 @@ static struct optparse_option restore_opts[] = {
458471
{ .name = "no-cache", .has_arg = 0,
459472
.usage = "Bypass the broker content cache",
460473
},
474+
{ .name = "size-limit", .has_arg = 1, .arginfo = "SIZE",
475+
.usage = "Do not restore blobs greater than SIZE bytes",
476+
},
461477
OPTPARSE_TABLE_END
462478
};
463479

t/t2807-dump-cmd.t

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,4 +227,34 @@ test_expect_success 'restart flux instance and try to run a job' '
227227
flux run /bin/true
228228
'
229229

230+
# Cover --size-limit
231+
232+
test_expect_success 'create bigdump.tar with a 12M blob in it' '
233+
mkdir -p big &&
234+
dd if=/dev/zero of=big/tinyblob bs=1048576 count=1 &&
235+
dd if=/dev/zero of=big/bigblob bs=1048576 count=12 &&
236+
dd if=/dev/zero of=big/smallblob bs=1048576 count=3 &&
237+
dd if=/dev/zero of=big/medblob bs=1048576 count=6 &&
238+
dd if=/dev/zero of=big/med2blob bs=1048576 count=6 &&
239+
tar cvf bigdump.tar big
240+
'
241+
test_expect_success 'restore bigdump.tar and verify blob count' '
242+
flux start flux restore \
243+
--key=foo bigdump.tar 2>bigdump.err &&
244+
grep "restored 5 keys (7 blobs)" bigdump.err
245+
'
246+
test_expect_success 'restore bigdump.tar with size limit' '
247+
flux start flux restore --size-limit=10485760 \
248+
--key=foo bigdump.tar 2>bigdump2.err &&
249+
grep "exceeds" bigdump2.err &&
250+
grep "restored 4 keys (6 blobs)" bigdump2.err
251+
'
252+
test_expect_success 'rc1 skips blob that exceeds 100M limit' '
253+
dd if=/dev/zero of=big/hugeblob bs=1048576 count=120 &&
254+
tar cvf bigdump2.tar big &&
255+
flux start -o,-Scontent.restore=bigdump2.tar \
256+
/bin/true 2>bigdump3.err &&
257+
grep "exceeds" bigdump3.err
258+
'
259+
230260
test_done

0 commit comments

Comments
 (0)