@@ -66,6 +66,7 @@ int reftable_new_stack(struct reftable_stack **dest, const char *dir,
66
66
strbuf_addstr (& list_file_name , "/tables.list" );
67
67
68
68
p -> list_file = strbuf_detach (& list_file_name , NULL );
69
+ p -> list_fd = -1 ;
69
70
p -> reftable_dir = xstrdup (dir );
70
71
p -> config = config ;
71
72
@@ -175,6 +176,12 @@ void reftable_stack_destroy(struct reftable_stack *st)
175
176
st -> readers_len = 0 ;
176
177
FREE_AND_NULL (st -> readers );
177
178
}
179
+
180
+ if (st -> list_fd >= 0 ) {
181
+ close (st -> list_fd );
182
+ st -> list_fd = -1 ;
183
+ }
184
+
178
185
FREE_AND_NULL (st -> list_file );
179
186
FREE_AND_NULL (st -> reftable_dir );
180
187
reftable_free (st );
@@ -304,69 +311,134 @@ static int tv_cmp(struct timeval *a, struct timeval *b)
304
311
static int reftable_stack_reload_maybe_reuse (struct reftable_stack * st ,
305
312
int reuse_open )
306
313
{
307
- struct timeval deadline = { 0 } ;
308
- int err = gettimeofday ( & deadline , NULL ) ;
314
+ char * * names = NULL , * * names_after = NULL ;
315
+ struct timeval deadline ;
309
316
int64_t delay = 0 ;
310
- int tries = 0 ;
311
- if (err < 0 )
312
- return err ;
317
+ int tries = 0 , err ;
318
+ int fd = -1 ;
313
319
320
+ err = gettimeofday (& deadline , NULL );
321
+ if (err < 0 )
322
+ goto out ;
314
323
deadline .tv_sec += 3 ;
324
+
315
325
while (1 ) {
316
- char * * names = NULL ;
317
- char * * names_after = NULL ;
318
- struct timeval now = { 0 };
319
- int err = gettimeofday (& now , NULL );
320
- int err2 = 0 ;
321
- if (err < 0 ) {
322
- return err ;
323
- }
326
+ struct timeval now ;
324
327
325
- /* Only look at deadlines after the first few times. This
326
- simplifies debugging in GDB */
328
+ err = gettimeofday (& now , NULL );
329
+ if (err < 0 )
330
+ goto out ;
331
+
332
+ /*
333
+ * Only look at deadlines after the first few times. This
334
+ * simplifies debugging in GDB.
335
+ */
327
336
tries ++ ;
328
- if (tries > 3 && tv_cmp (& now , & deadline ) >= 0 ) {
329
- break ;
330
- }
337
+ if (tries > 3 && tv_cmp (& now , & deadline ) >= 0 )
338
+ goto out ;
331
339
332
- err = read_lines (st -> list_file , & names );
333
- if (err < 0 ) {
334
- free_names (names );
335
- return err ;
336
- }
337
- err = reftable_stack_reload_once (st , names , reuse_open );
338
- if (err == 0 ) {
339
- free_names (names );
340
- break ;
341
- }
342
- if (err != REFTABLE_NOT_EXIST_ERROR ) {
343
- free_names (names );
344
- return err ;
345
- }
340
+ fd = open (st -> list_file , O_RDONLY );
341
+ if (fd < 0 ) {
342
+ if (errno != ENOENT ) {
343
+ err = REFTABLE_IO_ERROR ;
344
+ goto out ;
345
+ }
346
346
347
- /* err == REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
348
- writer. Check if there was one by checking if the name list
349
- changed.
350
- */
351
- err2 = read_lines (st -> list_file , & names_after );
352
- if (err2 < 0 ) {
353
- free_names (names );
354
- return err2 ;
347
+ names = reftable_calloc (sizeof (char * ));
348
+ } else {
349
+ err = fd_read_lines (fd , & names );
350
+ if (err < 0 )
351
+ goto out ;
355
352
}
356
353
354
+ err = reftable_stack_reload_once (st , names , reuse_open );
355
+ if (!err )
356
+ break ;
357
+ if (err != REFTABLE_NOT_EXIST_ERROR )
358
+ goto out ;
359
+
360
+ /*
361
+ * REFTABLE_NOT_EXIST_ERROR can be caused by a concurrent
362
+ * writer. Check if there was one by checking if the name list
363
+ * changed.
364
+ */
365
+ err = read_lines (st -> list_file , & names_after );
366
+ if (err < 0 )
367
+ goto out ;
357
368
if (names_equal (names_after , names )) {
358
- free_names (names );
359
- free_names (names_after );
360
- return err ;
369
+ err = REFTABLE_NOT_EXIST_ERROR ;
370
+ goto out ;
361
371
}
372
+
362
373
free_names (names );
374
+ names = NULL ;
363
375
free_names (names_after );
376
+ names_after = NULL ;
377
+ close (fd );
378
+ fd = -1 ;
364
379
365
380
delay = delay + (delay * rand ()) / RAND_MAX + 1 ;
366
381
sleep_millisec (delay );
367
382
}
368
383
369
- return 0 ;
384
+ out :
385
+ /*
386
+ * Invalidate the stat cache. It is sufficient to only close the file
387
+ * descriptor and keep the cached stat info because we never use the
388
+ * latter when the former is negative.
389
+ */
390
+ if (st -> list_fd >= 0 ) {
391
+ close (st -> list_fd );
392
+ st -> list_fd = -1 ;
393
+ }
394
+
395
+ /*
396
+ * Cache stat information in case it provides a useful signal to us.
397
+ * According to POSIX, "The st_ino and st_dev fields taken together
398
+ * uniquely identify the file within the system." That being said,
399
+ * Windows is not POSIX compliant and we do not have these fields
400
+ * available. So the information we have there is insufficient to
401
+ * determine whether two file descriptors point to the same file.
402
+ *
403
+ * While we could fall back to using other signals like the file's
404
+ * mtime, those are not sufficient to avoid races. We thus refrain from
405
+ * using the stat cache on such systems and fall back to the secondary
406
+ * caching mechanism, which is to check whether contents of the file
407
+ * have changed.
408
+ *
409
+ * On other systems which are POSIX compliant we must keep the file
410
+ * descriptor open. This is to avoid a race condition where two
411
+ * processes access the reftable stack at the same point in time:
412
+ *
413
+ * 1. A reads the reftable stack and caches its stat info.
414
+ *
415
+ * 2. B updates the stack, appending a new table to "tables.list".
416
+ * This will both use a new inode and result in a different file
417
+ * size, thus invalidating A's cache in theory.
418
+ *
419
+ * 3. B decides to auto-compact the stack and merges two tables. The
420
+ * file size now matches what A has cached again. Furthermore, the
421
+ * filesystem may decide to recycle the inode number of the file
422
+ * we have replaced in (2) because it is not in use anymore.
423
+ *
424
+ * 4. A reloads the reftable stack. Neither the inode number nor the
425
+ * file size changed. If the timestamps did not change either then
426
+ * we think the cached copy of our stack is up-to-date.
427
+ *
428
+ * By keeping the file descriptor open the inode number cannot be
429
+ * recycled, mitigating the race.
430
+ */
431
+ if (!err && fd >= 0 && !fstat (fd , & st -> list_st ) &&
432
+ st -> list_st .st_dev && st -> list_st .st_ino ) {
433
+ st -> list_fd = fd ;
434
+ fd = -1 ;
435
+ }
436
+
437
+ if (fd >= 0 )
438
+ close (fd );
439
+ free_names (names );
440
+ free_names (names_after );
441
+ return err ;
370
442
}
371
443
372
444
/* -1 = error
@@ -375,8 +447,44 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
375
447
static int stack_uptodate (struct reftable_stack * st )
376
448
{
377
449
char * * names = NULL ;
378
- int err = read_lines ( st -> list_file , & names ) ;
450
+ int err ;
379
451
int i = 0 ;
452
+
453
+ /*
454
+ * When we have cached stat information available then we use it to
455
+ * verify whether the file has been rewritten.
456
+ *
457
+ * Note that we explicitly do not want to use `stat_validity_check()`
458
+ * and friends here because they may end up not comparing the `st_dev`
459
+ * and `st_ino` fields. These functions thus cannot guarantee that we
460
+ * indeed still have the same file.
461
+ */
462
+ if (st -> list_fd >= 0 ) {
463
+ struct stat list_st ;
464
+
465
+ if (stat (st -> list_file , & list_st ) < 0 ) {
466
+ /*
467
+ * It's fine for "tables.list" to not exist. In that
468
+ * case, we have to refresh when the loaded stack has
469
+ * any readers.
470
+ */
471
+ if (errno == ENOENT )
472
+ return !!st -> readers_len ;
473
+ return REFTABLE_IO_ERROR ;
474
+ }
475
+
476
+ /*
477
+ * When "tables.list" refers to the same file we can assume
478
+ * that it didn't change. This is because we always use
479
+ * rename(3P) to update the file and never write to it
480
+ * directly.
481
+ */
482
+ if (st -> list_st .st_dev == list_st .st_dev &&
483
+ st -> list_st .st_ino == list_st .st_ino )
484
+ return 0 ;
485
+ }
486
+
487
+ err = read_lines (st -> list_file , & names );
380
488
if (err < 0 )
381
489
return err ;
382
490
@@ -559,7 +667,7 @@ int reftable_addition_commit(struct reftable_addition *add)
559
667
add -> new_tables = NULL ;
560
668
add -> new_tables_len = 0 ;
561
669
562
- err = reftable_stack_reload (add -> stack );
670
+ err = reftable_stack_reload_maybe_reuse (add -> stack , 1 );
563
671
if (err )
564
672
goto done ;
565
673
0 commit comments