Skip to content

Commit db38d30

Browse files
Christoph Hellwigkawasaki
authored andcommitted
xfs: rework zone GC buffer management
The double buffering where just one scratch area is used at a time does not efficiently use the available memory. It was originally implemented when GC I/O could happen out of order, but that was removed before upstream submission to avoid fragmentation. Now that all GC I/Os are processed in order, just use a number of buffers as a simple ring buffer. For a synthetic benchmark that fills 256MiB HDD zones and punches out holes to free half the space this leads to a decrease of GC time by a little more than 25%. Thanks to Hans Holmberg <[email protected]> for testing and benchmarking. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Hans Holmberg <[email protected]> Reviewed-by: Damien Le Moal <[email protected]>
1 parent 1af4203 commit db38d30

File tree

1 file changed

+59
-47
lines changed

1 file changed

+59
-47
lines changed

fs/xfs/xfs_zone_gc.c

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -50,23 +50,11 @@
5050
*/
5151

5252
/*
53-
* Size of each GC scratch pad. This is also the upper bound for each
54-
* GC I/O, which helps to keep latency down.
53+
* Size of each GC scratch allocation, and the number of buffers.
5554
*/
56-
#define XFS_GC_CHUNK_SIZE SZ_1M
57-
58-
/*
59-
* Scratchpad data to read GCed data into.
60-
*
61-
* The offset member tracks where the next allocation starts, and freed tracks
62-
* the amount of space that is not used anymore.
63-
*/
64-
#define XFS_ZONE_GC_NR_SCRATCH 2
65-
struct xfs_zone_scratch {
66-
struct folio *folio;
67-
unsigned int offset;
68-
unsigned int freed;
69-
};
55+
#define XFS_GC_BUF_SIZE SZ_1M
56+
#define XFS_GC_NR_BUFS 2
57+
static_assert(XFS_GC_NR_BUFS < BIO_MAX_VECS);
7058

7159
/*
7260
* Chunk that is read and written for each GC operation.
@@ -141,10 +129,14 @@ struct xfs_zone_gc_data {
141129
struct bio_set bio_set;
142130

143131
/*
144-
* Scratchpad used, and index to indicated which one is used.
132+
* Scratchpad to buffer GC data, organized as a ring buffer over
133+
* discontiguous folios. scratch_head is where the buffer is filled,
134+
* and scratch_tail tracks the buffer space freed.
145135
*/
146-
struct xfs_zone_scratch scratch[XFS_ZONE_GC_NR_SCRATCH];
147-
unsigned int scratch_idx;
136+
struct folio *scratch_folios[XFS_GC_NR_BUFS];
137+
unsigned int scratch_size;
138+
unsigned int scratch_head;
139+
unsigned int scratch_tail;
148140

149141
/*
150142
* List of bios currently being read, written and reset.
@@ -210,20 +202,16 @@ xfs_zone_gc_data_alloc(
210202
if (!data->iter.recs)
211203
goto out_free_data;
212204

213-
/*
214-
* We actually only need a single bio_vec. It would be nice to have
215-
* a flag that only allocates the inline bvecs and not the separate
216-
* bvec pool.
217-
*/
218205
if (bioset_init(&data->bio_set, 16, offsetof(struct xfs_gc_bio, bio),
219206
BIOSET_NEED_BVECS))
220207
goto out_free_recs;
221-
for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++) {
222-
data->scratch[i].folio =
223-
folio_alloc(GFP_KERNEL, get_order(XFS_GC_CHUNK_SIZE));
224-
if (!data->scratch[i].folio)
208+
for (i = 0; i < XFS_GC_NR_BUFS; i++) {
209+
data->scratch_folios[i] =
210+
folio_alloc(GFP_KERNEL, get_order(XFS_GC_BUF_SIZE));
211+
if (!data->scratch_folios[i])
225212
goto out_free_scratch;
226213
}
214+
data->scratch_size = XFS_GC_BUF_SIZE * XFS_GC_NR_BUFS;
227215
INIT_LIST_HEAD(&data->reading);
228216
INIT_LIST_HEAD(&data->writing);
229217
INIT_LIST_HEAD(&data->resetting);
@@ -232,7 +220,7 @@ xfs_zone_gc_data_alloc(
232220

233221
out_free_scratch:
234222
while (--i >= 0)
235-
folio_put(data->scratch[i].folio);
223+
folio_put(data->scratch_folios[i]);
236224
bioset_exit(&data->bio_set);
237225
out_free_recs:
238226
kfree(data->iter.recs);
@@ -247,8 +235,8 @@ xfs_zone_gc_data_free(
247235
{
248236
int i;
249237

250-
for (i = 0; i < XFS_ZONE_GC_NR_SCRATCH; i++)
251-
folio_put(data->scratch[i].folio);
238+
for (i = 0; i < XFS_GC_NR_BUFS; i++)
239+
folio_put(data->scratch_folios[i]);
252240
bioset_exit(&data->bio_set);
253241
kfree(data->iter.recs);
254242
kfree(data);
@@ -590,7 +578,12 @@ static unsigned int
590578
xfs_zone_gc_scratch_available(
591579
struct xfs_zone_gc_data *data)
592580
{
593-
return XFS_GC_CHUNK_SIZE - data->scratch[data->scratch_idx].offset;
581+
if (!data->scratch_tail)
582+
return data->scratch_size - data->scratch_head;
583+
584+
if (!data->scratch_head)
585+
return data->scratch_tail;
586+
return (data->scratch_size - data->scratch_head) + data->scratch_tail;
594587
}
595588

596589
static bool
@@ -664,6 +657,28 @@ xfs_zone_gc_alloc_blocks(
664657
return oz;
665658
}
666659

660+
static void
661+
xfs_zone_gc_add_data(
662+
struct xfs_gc_bio *chunk)
663+
{
664+
struct xfs_zone_gc_data *data = chunk->data;
665+
unsigned int len = chunk->len;
666+
unsigned int off = data->scratch_head;
667+
668+
do {
669+
unsigned int this_off = off % XFS_GC_BUF_SIZE;
670+
unsigned int this_len = min(len, XFS_GC_BUF_SIZE - this_off);
671+
672+
bio_add_folio_nofail(&chunk->bio,
673+
data->scratch_folios[off / XFS_GC_BUF_SIZE],
674+
this_len, this_off);
675+
len -= this_len;
676+
off += this_len;
677+
if (off == data->scratch_size)
678+
off = 0;
679+
} while (len);
680+
}
681+
667682
static bool
668683
xfs_zone_gc_start_chunk(
669684
struct xfs_zone_gc_data *data)
@@ -677,6 +692,7 @@ xfs_zone_gc_start_chunk(
677692
struct xfs_inode *ip;
678693
struct bio *bio;
679694
xfs_daddr_t daddr;
695+
unsigned int len;
680696
bool is_seq;
681697

682698
if (xfs_is_shutdown(mp))
@@ -691,17 +707,19 @@ xfs_zone_gc_start_chunk(
691707
return false;
692708
}
693709

694-
bio = bio_alloc_bioset(bdev, 1, REQ_OP_READ, GFP_NOFS, &data->bio_set);
710+
len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
711+
bio = bio_alloc_bioset(bdev,
712+
min(howmany(len, XFS_GC_BUF_SIZE) + 1, XFS_GC_NR_BUFS),
713+
REQ_OP_READ, GFP_NOFS, &data->bio_set);
695714

696715
chunk = container_of(bio, struct xfs_gc_bio, bio);
697716
chunk->ip = ip;
698717
chunk->offset = XFS_FSB_TO_B(mp, irec.rm_offset);
699-
chunk->len = XFS_FSB_TO_B(mp, irec.rm_blockcount);
718+
chunk->len = len;
700719
chunk->old_startblock =
701720
xfs_rgbno_to_rtb(iter->victim_rtg, irec.rm_startblock);
702721
chunk->new_daddr = daddr;
703722
chunk->is_seq = is_seq;
704-
chunk->scratch = &data->scratch[data->scratch_idx];
705723
chunk->data = data;
706724
chunk->oz = oz;
707725
chunk->victim_rtg = iter->victim_rtg;
@@ -710,13 +728,9 @@ xfs_zone_gc_start_chunk(
710728

711729
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
712730
bio->bi_end_io = xfs_zone_gc_end_io;
713-
bio_add_folio_nofail(bio, chunk->scratch->folio, chunk->len,
714-
chunk->scratch->offset);
715-
chunk->scratch->offset += chunk->len;
716-
if (chunk->scratch->offset == XFS_GC_CHUNK_SIZE) {
717-
data->scratch_idx =
718-
(data->scratch_idx + 1) % XFS_ZONE_GC_NR_SCRATCH;
719-
}
731+
xfs_zone_gc_add_data(chunk);
732+
data->scratch_head = (data->scratch_head + len) % data->scratch_size;
733+
720734
WRITE_ONCE(chunk->state, XFS_GC_BIO_NEW);
721735
list_add_tail(&chunk->entry, &data->reading);
722736
xfs_zone_gc_iter_advance(iter, irec.rm_blockcount);
@@ -834,6 +848,7 @@ xfs_zone_gc_finish_chunk(
834848
struct xfs_gc_bio *chunk)
835849
{
836850
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
851+
struct xfs_zone_gc_data *data = chunk->data;
837852
struct xfs_inode *ip = chunk->ip;
838853
struct xfs_mount *mp = ip->i_mount;
839854
int error;
@@ -845,11 +860,8 @@ xfs_zone_gc_finish_chunk(
845860
return;
846861
}
847862

848-
chunk->scratch->freed += chunk->len;
849-
if (chunk->scratch->freed == chunk->scratch->offset) {
850-
chunk->scratch->offset = 0;
851-
chunk->scratch->freed = 0;
852-
}
863+
data->scratch_tail =
864+
(data->scratch_tail + chunk->len) % data->scratch_size;
853865

854866
/*
855867
* Cycle through the iolock and wait for direct I/O and layouts to

0 commit comments

Comments
 (0)