Skip to content

Commit 69b7379

Browse files
committed
Merge branch 'ps/reftable-stack-compaction'
The code paths to compact multiple reftable files have been updated to correctly deal with multiple compaction triggering at the same time. * ps/reftable-stack-compaction: reftable/stack: handle locked tables during auto-compaction reftable/stack: fix corruption on concurrent compaction reftable/stack: use lock_file when adding table to "tables.list" reftable/stack: do not die when fsyncing lock file files reftable/stack: simplify tracking of table locks reftable/stack: update stats on failed full compaction reftable/stack: test compaction with already-locked tables reftable/stack: extract function to setup stack with N tables reftable/stack: refactor function to gather table sizes
2 parents 2b9b229 + f234df0 commit 69b7379

File tree

3 files changed

+310
-84
lines changed

3 files changed

+310
-84
lines changed

reftable/stack.c

Lines changed: 186 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ static void format_name(struct strbuf *dest, uint64_t min, uint64_t max)
567567
}
568568

569569
struct reftable_addition {
570-
struct tempfile *lock_file;
570+
struct lock_file tables_list_lock;
571571
struct reftable_stack *stack;
572572

573573
char **new_tables;
@@ -581,13 +581,13 @@ static int reftable_stack_init_addition(struct reftable_addition *add,
581581
struct reftable_stack *st)
582582
{
583583
struct strbuf lock_file_name = STRBUF_INIT;
584-
int err = 0;
585-
add->stack = st;
584+
int err;
586585

587-
strbuf_addf(&lock_file_name, "%s.lock", st->list_file);
586+
add->stack = st;
588587

589-
add->lock_file = create_tempfile(lock_file_name.buf);
590-
if (!add->lock_file) {
588+
err = hold_lock_file_for_update(&add->tables_list_lock, st->list_file,
589+
LOCK_NO_DEREF);
590+
if (err < 0) {
591591
if (errno == EEXIST) {
592592
err = REFTABLE_LOCK_ERROR;
593593
} else {
@@ -596,7 +596,8 @@ static int reftable_stack_init_addition(struct reftable_addition *add,
596596
goto done;
597597
}
598598
if (st->opts.default_permissions) {
599-
if (chmod(add->lock_file->filename.buf, st->opts.default_permissions) < 0) {
599+
if (chmod(get_lock_file_path(&add->tables_list_lock),
600+
st->opts.default_permissions) < 0) {
600601
err = REFTABLE_IO_ERROR;
601602
goto done;
602603
}
@@ -635,7 +636,7 @@ static void reftable_addition_close(struct reftable_addition *add)
635636
add->new_tables_len = 0;
636637
add->new_tables_cap = 0;
637638

638-
delete_tempfile(&add->lock_file);
639+
rollback_lock_file(&add->tables_list_lock);
639640
strbuf_release(&nm);
640641
}
641642

@@ -651,7 +652,7 @@ void reftable_addition_destroy(struct reftable_addition *add)
651652
int reftable_addition_commit(struct reftable_addition *add)
652653
{
653654
struct strbuf table_list = STRBUF_INIT;
654-
int lock_file_fd = get_tempfile_fd(add->lock_file);
655+
int lock_file_fd = get_lock_file_fd(&add->tables_list_lock);
655656
int err = 0;
656657
size_t i;
657658

@@ -674,10 +675,13 @@ int reftable_addition_commit(struct reftable_addition *add)
674675
goto done;
675676
}
676677

677-
fsync_component_or_die(FSYNC_COMPONENT_REFERENCE, lock_file_fd,
678-
get_tempfile_path(add->lock_file));
678+
err = fsync_component(FSYNC_COMPONENT_REFERENCE, lock_file_fd);
679+
if (err < 0) {
680+
err = REFTABLE_IO_ERROR;
681+
goto done;
682+
}
679683

680-
err = rename_tempfile(&add->lock_file, add->stack->list_file);
684+
err = commit_lock_file(&add->tables_list_lock);
681685
if (err < 0) {
682686
err = REFTABLE_IO_ERROR;
683687
goto done;
@@ -995,6 +999,15 @@ static int stack_write_compact(struct reftable_stack *st,
995999
return err;
9961000
}
9971001

1002+
enum stack_compact_range_flags {
1003+
/*
1004+
* Perform a best-effort compaction. That is, even if we cannot lock
1005+
* all tables in the specified range, we will try to compact the
1006+
* remaining slice.
1007+
*/
1008+
STACK_COMPACT_RANGE_BEST_EFFORT = (1 << 0),
1009+
};
1010+
9981011
/*
9991012
* Compact all tables in the range `[first, last)` into a single new table.
10001013
*
@@ -1006,7 +1019,8 @@ static int stack_write_compact(struct reftable_stack *st,
10061019
*/
10071020
static int stack_compact_range(struct reftable_stack *st,
10081021
size_t first, size_t last,
1009-
struct reftable_log_expiry_config *expiry)
1022+
struct reftable_log_expiry_config *expiry,
1023+
unsigned int flags)
10101024
{
10111025
struct strbuf tables_list_buf = STRBUF_INIT;
10121026
struct strbuf new_table_name = STRBUF_INIT;
@@ -1016,7 +1030,9 @@ static int stack_compact_range(struct reftable_stack *st,
10161030
struct lock_file *table_locks = NULL;
10171031
struct tempfile *new_table = NULL;
10181032
int is_empty_table = 0, err = 0;
1019-
size_t i;
1033+
size_t first_to_replace, last_to_replace;
1034+
size_t i, nlocks = 0;
1035+
char **names = NULL;
10201036

10211037
if (first > last || (!expiry && first == last)) {
10221038
err = 0;
@@ -1046,27 +1062,55 @@ static int stack_compact_range(struct reftable_stack *st,
10461062
/*
10471063
* Lock all tables in the user-provided range. This is the slice of our
10481064
* stack which we'll compact.
1065+
*
1066+
* Note that we lock tables in reverse order from last to first. The
1067+
* intent behind this is to allow a newer process to perform best
1068+
* effort compaction of tables that it has added in the case where an
1069+
* older process is still busy compacting tables which are preexisting
1070+
* from the point of view of the newer process.
10491071
*/
10501072
REFTABLE_CALLOC_ARRAY(table_locks, last - first + 1);
1051-
for (i = first; i <= last; i++) {
1052-
stack_filename(&table_name, st, reader_name(st->readers[i]));
1073+
for (i = last + 1; i > first; i--) {
1074+
stack_filename(&table_name, st, reader_name(st->readers[i - 1]));
10531075

1054-
err = hold_lock_file_for_update(&table_locks[i - first],
1076+
err = hold_lock_file_for_update(&table_locks[nlocks],
10551077
table_name.buf, LOCK_NO_DEREF);
10561078
if (err < 0) {
1057-
if (errno == EEXIST)
1079+
/*
1080+
* When the table is locked already we may do a
1081+
* best-effort compaction and compact only the tables
1082+
* that we have managed to lock so far. This of course
1083+
* requires that we have been able to lock at least two
1084+
* tables, otherwise there would be nothing to compact.
1085+
* In that case, we return a lock error to our caller.
1086+
*/
1087+
if (errno == EEXIST && last - (i - 1) >= 2 &&
1088+
flags & STACK_COMPACT_RANGE_BEST_EFFORT) {
1089+
err = 0;
1090+
/*
1091+
* The subtraction is to offset the index, the
1092+
* addition is to only compact up to the table
1093+
* of the preceding iteration. They obviously
1094+
* cancel each other out, but that may be
1095+
* non-obvious when it was omitted.
1096+
*/
1097+
first = (i - 1) + 1;
1098+
break;
1099+
} else if (errno == EEXIST) {
10581100
err = REFTABLE_LOCK_ERROR;
1059-
else
1101+
goto done;
1102+
} else {
10601103
err = REFTABLE_IO_ERROR;
1061-
goto done;
1104+
goto done;
1105+
}
10621106
}
10631107

10641108
/*
10651109
* We need to close the lockfiles as we might otherwise easily
10661110
* run into file descriptor exhaustion when we compress a lot
10671111
* of tables.
10681112
*/
1069-
err = close_lock_file_gently(&table_locks[i - first]);
1113+
err = close_lock_file_gently(&table_locks[nlocks++]);
10701114
if (err < 0) {
10711115
err = REFTABLE_IO_ERROR;
10721116
goto done;
@@ -1119,6 +1163,100 @@ static int stack_compact_range(struct reftable_stack *st,
11191163
}
11201164
}
11211165

1166+
/*
1167+
* As we have unlocked the stack while compacting our slice of tables
1168+
* it may have happened that a concurrently running process has updated
1169+
* the stack while we were compacting. In that case, we need to check
1170+
* whether the tables that we have just compacted still exist in the
1171+
* stack in the exact same order as we have compacted them.
1172+
*
1173+
* If they do exist, then it is fine to continue and replace those
1174+
* tables with our compacted version. If they don't, then we need to
1175+
* abort.
1176+
*/
1177+
err = stack_uptodate(st);
1178+
if (err < 0)
1179+
goto done;
1180+
if (err > 0) {
1181+
ssize_t new_offset = -1;
1182+
int fd;
1183+
1184+
fd = open(st->list_file, O_RDONLY);
1185+
if (fd < 0) {
1186+
err = REFTABLE_IO_ERROR;
1187+
goto done;
1188+
}
1189+
1190+
err = fd_read_lines(fd, &names);
1191+
close(fd);
1192+
if (err < 0)
1193+
goto done;
1194+
1195+
/*
1196+
* Search for the offset of the first table that we have
1197+
* compacted in the updated "tables.list" file.
1198+
*/
1199+
for (size_t i = 0; names[i]; i++) {
1200+
if (strcmp(names[i], st->readers[first]->name))
1201+
continue;
1202+
1203+
/*
1204+
* We have found the first entry. Verify that all the
1205+
* subsequent tables we have compacted still exist in
1206+
* the modified stack in the exact same order as we
1207+
* have compacted them.
1208+
*/
1209+
for (size_t j = 1; j < last - first + 1; j++) {
1210+
const char *old = first + j < st->merged->stack_len ?
1211+
st->readers[first + j]->name : NULL;
1212+
const char *new = names[i + j];
1213+
1214+
/*
1215+
* If some entries are missing or in case the tables
1216+
* have changed then we need to bail out. Again, this
1217+
* shouldn't ever happen because we have locked the
1218+
* tables we are compacting.
1219+
*/
1220+
if (!old || !new || strcmp(old, new)) {
1221+
err = REFTABLE_OUTDATED_ERROR;
1222+
goto done;
1223+
}
1224+
}
1225+
1226+
new_offset = i;
1227+
break;
1228+
}
1229+
1230+
/*
1231+
* In case we didn't find our compacted tables in the stack we
1232+
* need to bail out. In theory, this should have never happened
1233+
* because we locked the tables we are compacting.
1234+
*/
1235+
if (new_offset < 0) {
1236+
err = REFTABLE_OUTDATED_ERROR;
1237+
goto done;
1238+
}
1239+
1240+
/*
1241+
* We have found the new range that we want to replace, so
1242+
* let's update the range of tables that we want to replace.
1243+
*/
1244+
first_to_replace = new_offset;
1245+
last_to_replace = last + (new_offset - first);
1246+
} else {
1247+
/*
1248+
* `fd_read_lines()` uses a `NULL` sentinel to indicate that
1249+
* the array is at its end. As we use `free_names()` to free
1250+
* the array, we need to include this sentinel value here and
1251+
* thus have to allocate `stack_len + 1` many entries.
1252+
*/
1253+
REFTABLE_CALLOC_ARRAY(names, st->merged->stack_len + 1);
1254+
for (size_t i = 0; i < st->merged->stack_len; i++)
1255+
names[i] = xstrdup(st->readers[i]->name);
1256+
first_to_replace = first;
1257+
last_to_replace = last;
1258+
}
1259+
11221260
/*
11231261
* If the resulting compacted table is not empty, then we need to move
11241262
* it into place now.
@@ -1141,12 +1279,12 @@ static int stack_compact_range(struct reftable_stack *st,
11411279
* have just written. In case the compacted table became empty we
11421280
* simply skip writing it.
11431281
*/
1144-
for (i = 0; i < first; i++)
1145-
strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1282+
for (i = 0; i < first_to_replace; i++)
1283+
strbuf_addf(&tables_list_buf, "%s\n", names[i]);
11461284
if (!is_empty_table)
11471285
strbuf_addf(&tables_list_buf, "%s\n", new_table_name.buf);
1148-
for (i = last + 1; i < st->merged->stack_len; i++)
1149-
strbuf_addf(&tables_list_buf, "%s\n", st->readers[i]->name);
1286+
for (i = last_to_replace + 1; names[i]; i++)
1287+
strbuf_addf(&tables_list_buf, "%s\n", names[i]);
11501288

11511289
err = write_in_full(get_lock_file_fd(&tables_list_lock),
11521290
tables_list_buf.buf, tables_list_buf.len);
@@ -1183,45 +1321,47 @@ static int stack_compact_range(struct reftable_stack *st,
11831321
* Delete the old tables. They may still be in use by concurrent
11841322
* readers, so it is expected that unlinking tables may fail.
11851323
*/
1186-
for (i = first; i <= last; i++) {
1187-
struct lock_file *table_lock = &table_locks[i - first];
1324+
for (i = 0; i < nlocks; i++) {
1325+
struct lock_file *table_lock = &table_locks[i];
11881326
char *table_path = get_locked_file_path(table_lock);
11891327
unlink(table_path);
11901328
free(table_path);
11911329
}
11921330

11931331
done:
11941332
rollback_lock_file(&tables_list_lock);
1195-
for (i = first; table_locks && i <= last; i++)
1196-
rollback_lock_file(&table_locks[i - first]);
1333+
for (i = 0; table_locks && i < nlocks; i++)
1334+
rollback_lock_file(&table_locks[i]);
11971335
reftable_free(table_locks);
11981336

11991337
delete_tempfile(&new_table);
12001338
strbuf_release(&new_table_name);
12011339
strbuf_release(&new_table_path);
1202-
12031340
strbuf_release(&tables_list_buf);
12041341
strbuf_release(&table_name);
1205-
return err;
1206-
}
1342+
free_names(names);
12071343

1208-
int reftable_stack_compact_all(struct reftable_stack *st,
1209-
struct reftable_log_expiry_config *config)
1210-
{
1211-
return stack_compact_range(st, 0, st->merged->stack_len ?
1212-
st->merged->stack_len - 1 : 0, config);
1344+
return err;
12131345
}
12141346

12151347
static int stack_compact_range_stats(struct reftable_stack *st,
12161348
size_t first, size_t last,
1217-
struct reftable_log_expiry_config *config)
1349+
struct reftable_log_expiry_config *config,
1350+
unsigned int flags)
12181351
{
1219-
int err = stack_compact_range(st, first, last, config);
1352+
int err = stack_compact_range(st, first, last, config, flags);
12201353
if (err == REFTABLE_LOCK_ERROR)
12211354
st->stats.failures++;
12221355
return err;
12231356
}
12241357

1358+
int reftable_stack_compact_all(struct reftable_stack *st,
1359+
struct reftable_log_expiry_config *config)
1360+
{
1361+
size_t last = st->merged->stack_len ? st->merged->stack_len - 1 : 0;
1362+
return stack_compact_range_stats(st, 0, last, config, 0);
1363+
}
1364+
12251365
static int segment_size(struct segment *s)
12261366
{
12271367
return s->end - s->start;
@@ -1305,14 +1445,15 @@ struct segment suggest_compaction_segment(uint64_t *sizes, size_t n,
13051445

13061446
static uint64_t *stack_table_sizes_for_compaction(struct reftable_stack *st)
13071447
{
1308-
uint64_t *sizes =
1309-
reftable_calloc(st->merged->stack_len, sizeof(*sizes));
13101448
int version = (st->opts.hash_id == GIT_SHA1_FORMAT_ID) ? 1 : 2;
13111449
int overhead = header_size(version) - 1;
1312-
int i = 0;
1313-
for (i = 0; i < st->merged->stack_len; i++) {
1450+
uint64_t *sizes;
1451+
1452+
REFTABLE_CALLOC_ARRAY(sizes, st->merged->stack_len);
1453+
1454+
for (size_t i = 0; i < st->merged->stack_len; i++)
13141455
sizes[i] = st->readers[i]->size - overhead;
1315-
}
1456+
13161457
return sizes;
13171458
}
13181459

@@ -1325,7 +1466,7 @@ int reftable_stack_auto_compact(struct reftable_stack *st)
13251466
reftable_free(sizes);
13261467
if (segment_size(&seg) > 0)
13271468
return stack_compact_range_stats(st, seg.start, seg.end - 1,
1328-
NULL);
1469+
NULL, STACK_COMPACT_RANGE_BEST_EFFORT);
13291470

13301471
return 0;
13311472
}

0 commit comments

Comments
 (0)