Skip to content

Commit 9db0d7c

Browse files
committed
Merge tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino: "Just a single bug fix (and documentation for the issue)" * tag 'xfs-fixes-6.18-rc4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: document another racy GC case in xfs_zoned_map_extent xfs: prevent gc from picking the same zone twice
2 parents cb7f9fc + 0db22d7 commit 9db0d7c

File tree

3 files changed

+41
-0
lines changed

3 files changed

+41
-0
lines changed

fs/xfs/libxfs/xfs_rtgroup.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ struct xfs_rtgroup {
5050
uint8_t *rtg_rsum_cache;
5151
struct xfs_open_zone *rtg_open_zone;
5252
};
53+
54+
/*
55+
* Count of outstanding GC operations for zoned XFS. Any RTG with a
56+
* non-zero rtg_gccount will not be picked as new GC victim.
57+
*/
58+
atomic_t rtg_gccount;
5359
};
5460

5561
/*

fs/xfs/xfs_zone_alloc.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,14 @@ xfs_zoned_map_extent(
246246
* If a data write raced with this GC write, keep the existing data in
247247
* the data fork, mark our newly written GC extent as reclaimable, then
248248
* move on to the next extent.
249+
*
250+
* Note that this can also happen when racing with operations that do
251+
* not actually invalidate the data, but just move it to a different
252+
* inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the
253+
* inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the
254+
* data was just moved around, GC fails to free the zone, but the zone
255+
* becomes a GC candidate again as soon as all previous GC I/O has
256+
* finished and these blocks will be moved out eventually.
249257
*/
250258
if (old_startblock != NULLFSBLOCK &&
251259
old_startblock != data.br_startblock)

fs/xfs/xfs_zone_gc.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ struct xfs_gc_bio {
114114
/* Open Zone being written to */
115115
struct xfs_open_zone *oz;
116116

117+
struct xfs_rtgroup *victim_rtg;
118+
117119
/* Bio used for reads and writes, including the bvec used by it */
118120
struct bio_vec bv;
119121
struct bio bio; /* must be last */
@@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
264266
iter->rec_count = 0;
265267
iter->rec_idx = 0;
266268
iter->victim_rtg = victim_rtg;
269+
atomic_inc(&victim_rtg->rtg_gccount);
267270
}
268271

269272
/*
@@ -362,6 +365,7 @@ xfs_zone_gc_query(
362365

363366
return 0;
364367
done:
368+
atomic_dec(&iter->victim_rtg->rtg_gccount);
365369
xfs_rtgroup_rele(iter->victim_rtg);
366370
iter->victim_rtg = NULL;
367371
return 0;
@@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
451455
if (!rtg)
452456
continue;
453457

458+
/*
459+
* If the zone is already undergoing GC, don't pick it again.
460+
*
461+
* This prevents us from picking one of the zones for which we
462+
* already submitted GC I/O, but for which the remapping hasn't
463+
* concluded yet. This won't cause data corruption, but
464+
* increases write amplification and slows down GC, so this is
465+
* a bad thing.
466+
*/
467+
if (atomic_read(&rtg->rtg_gccount)) {
468+
xfs_rtgroup_rele(rtg);
469+
continue;
470+
}
471+
454472
/* skip zones that are just waiting for a reset */
455473
if (rtg_rmap(rtg)->i_used_blocks == 0 ||
456474
rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk(
688706
chunk->scratch = &data->scratch[data->scratch_idx];
689707
chunk->data = data;
690708
chunk->oz = oz;
709+
chunk->victim_rtg = iter->victim_rtg;
710+
atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
711+
atomic_inc(&chunk->victim_rtg->rtg_gccount);
691712

692713
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
693714
bio->bi_end_io = xfs_zone_gc_end_io;
@@ -710,6 +731,8 @@ static void
710731
xfs_zone_gc_free_chunk(
711732
struct xfs_gc_bio *chunk)
712733
{
734+
atomic_dec(&chunk->victim_rtg->rtg_gccount);
735+
xfs_rtgroup_rele(chunk->victim_rtg);
713736
list_del(&chunk->entry);
714737
xfs_open_zone_put(chunk->oz);
715738
xfs_irele(chunk->ip);
@@ -770,6 +793,10 @@ xfs_zone_gc_split_write(
770793
split_chunk->oz = chunk->oz;
771794
atomic_inc(&chunk->oz->oz_ref);
772795

796+
split_chunk->victim_rtg = chunk->victim_rtg;
797+
atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
798+
atomic_inc(&chunk->victim_rtg->rtg_gccount);
799+
773800
chunk->offset += split_len;
774801
chunk->len -= split_len;
775802
chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);

0 commit comments

Comments
 (0)