Skip to content

Commit 956f1b8

Browse files
committed
Merge tag 'rmap-speedups-5.19_2022-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next
xfs: fix rmap inefficiencies Reduce the performance impact of the reverse mapping btree when reflink is enabled by using the much faster non-overlapped btree lookup functions when we're searching the rmap index with a fully specified key. If we find the exact record we're looking for, great! We don't have to perform the full overlapped scan. For filesystems with high sharing factors this reduces the xfs_scrub runtime by a good 15%%. This has been shown to reduce the fstests runtime for realtime rmap configurations by 30%%, since the lack of AGs severely limits scalability. Signed-off-by: Dave Chinner <[email protected]>
2 parents 5e116e9 + 1edf805 commit 956f1b8

File tree

4 files changed

+106
-91
lines changed

4 files changed

+106
-91
lines changed

fs/xfs/libxfs/xfs_rmap.c

Lines changed: 96 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,32 @@ int
3434
xfs_rmap_lookup_le(
3535
struct xfs_btree_cur *cur,
3636
xfs_agblock_t bno,
37-
xfs_extlen_t len,
3837
uint64_t owner,
3938
uint64_t offset,
4039
unsigned int flags,
40+
struct xfs_rmap_irec *irec,
4141
int *stat)
4242
{
43+
int get_stat = 0;
44+
int error;
45+
4346
cur->bc_rec.r.rm_startblock = bno;
44-
cur->bc_rec.r.rm_blockcount = len;
47+
cur->bc_rec.r.rm_blockcount = 0;
4548
cur->bc_rec.r.rm_owner = owner;
4649
cur->bc_rec.r.rm_offset = offset;
4750
cur->bc_rec.r.rm_flags = flags;
48-
return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
51+
52+
error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
53+
if (error || !(*stat) || !irec)
54+
return error;
55+
56+
error = xfs_rmap_get_rec(cur, irec, &get_stat);
57+
if (error)
58+
return error;
59+
if (!get_stat)
60+
return -EFSCORRUPTED;
61+
62+
return 0;
4963
}
5064

5165
/*
@@ -251,7 +265,6 @@ xfs_rmap_get_rec(
251265
struct xfs_find_left_neighbor_info {
252266
struct xfs_rmap_irec high;
253267
struct xfs_rmap_irec *irec;
254-
int *stat;
255268
};
256269

257270
/* For each rmap given, figure out if it matches the key we want. */
@@ -276,7 +289,6 @@ xfs_rmap_find_left_neighbor_helper(
276289
return 0;
277290

278291
*info->irec = *rec;
279-
*info->stat = 1;
280292
return -ECANCELED;
281293
}
282294

@@ -285,7 +297,7 @@ xfs_rmap_find_left_neighbor_helper(
285297
* return a match with the same owner and adjacent physical and logical
286298
* block ranges.
287299
*/
288-
int
300+
STATIC int
289301
xfs_rmap_find_left_neighbor(
290302
struct xfs_btree_cur *cur,
291303
xfs_agblock_t bno,
@@ -296,6 +308,7 @@ xfs_rmap_find_left_neighbor(
296308
int *stat)
297309
{
298310
struct xfs_find_left_neighbor_info info;
311+
int found = 0;
299312
int error;
300313

301314
*stat = 0;
@@ -313,21 +326,44 @@ xfs_rmap_find_left_neighbor(
313326
info.high.rm_flags = flags;
314327
info.high.rm_blockcount = 0;
315328
info.irec = irec;
316-
info.stat = stat;
317329

318330
trace_xfs_rmap_find_left_neighbor_query(cur->bc_mp,
319331
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
320332

321-
error = xfs_rmap_query_range(cur, &info.high, &info.high,
322-
xfs_rmap_find_left_neighbor_helper, &info);
323-
if (error == -ECANCELED)
324-
error = 0;
325-
if (*stat)
326-
trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
327-
cur->bc_ag.pag->pag_agno, irec->rm_startblock,
328-
irec->rm_blockcount, irec->rm_owner,
329-
irec->rm_offset, irec->rm_flags);
330-
return error;
333+
/*
334+
* Historically, we always used the range query to walk every reverse
335+
* mapping that could possibly overlap the key that the caller asked
336+
* for, and filter out the ones that don't. That is very slow when
337+
* there are a lot of records.
338+
*
339+
* However, there are two scenarios where the classic btree search can
340+
* produce correct results -- if the index contains a record that is an
341+
* exact match for the lookup key; and if there are no other records
342+
* between the record we want and the key we supplied.
343+
*
344+
* As an optimization, try a non-overlapped lookup first. This makes
345+
* extent conversion and remap operations run a bit faster if the
346+
* physical extents aren't being shared. If we don't find what we
347+
* want, we fall back to the overlapped query.
348+
*/
349+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
350+
&found);
351+
if (error)
352+
return error;
353+
if (found)
354+
error = xfs_rmap_find_left_neighbor_helper(cur, irec, &info);
355+
if (!error)
356+
error = xfs_rmap_query_range(cur, &info.high, &info.high,
357+
xfs_rmap_find_left_neighbor_helper, &info);
358+
if (error != -ECANCELED)
359+
return error;
360+
361+
*stat = 1;
362+
trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
363+
cur->bc_ag.pag->pag_agno, irec->rm_startblock,
364+
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
365+
irec->rm_flags);
366+
return 0;
331367
}
332368

333369
/* For each rmap given, figure out if it matches the key we want. */
@@ -353,7 +389,6 @@ xfs_rmap_lookup_le_range_helper(
353389
return 0;
354390

355391
*info->irec = *rec;
356-
*info->stat = 1;
357392
return -ECANCELED;
358393
}
359394

@@ -374,6 +409,7 @@ xfs_rmap_lookup_le_range(
374409
int *stat)
375410
{
376411
struct xfs_find_left_neighbor_info info;
412+
int found = 0;
377413
int error;
378414

379415
info.high.rm_startblock = bno;
@@ -386,20 +422,44 @@ xfs_rmap_lookup_le_range(
386422
info.high.rm_blockcount = 0;
387423
*stat = 0;
388424
info.irec = irec;
389-
info.stat = stat;
390425

391-
trace_xfs_rmap_lookup_le_range(cur->bc_mp,
392-
cur->bc_ag.pag->pag_agno, bno, 0, owner, offset, flags);
393-
error = xfs_rmap_query_range(cur, &info.high, &info.high,
394-
xfs_rmap_lookup_le_range_helper, &info);
395-
if (error == -ECANCELED)
396-
error = 0;
397-
if (*stat)
398-
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
399-
cur->bc_ag.pag->pag_agno, irec->rm_startblock,
400-
irec->rm_blockcount, irec->rm_owner,
401-
irec->rm_offset, irec->rm_flags);
402-
return error;
426+
trace_xfs_rmap_lookup_le_range(cur->bc_mp, cur->bc_ag.pag->pag_agno,
427+
bno, 0, owner, offset, flags);
428+
429+
/*
430+
* Historically, we always used the range query to walk every reverse
431+
* mapping that could possibly overlap the key that the caller asked
432+
* for, and filter out the ones that don't. That is very slow when
433+
* there are a lot of records.
434+
*
435+
* However, there are two scenarios where the classic btree search can
436+
* produce correct results -- if the index contains a record that is an
437+
* exact match for the lookup key; and if there are no other records
438+
* between the record we want and the key we supplied.
439+
*
440+
* As an optimization, try a non-overlapped lookup first. This makes
441+
* scrub run much faster on most filesystems because bmbt records are
442+
* usually an exact match for rmap records. If we don't find what we
443+
* want, we fall back to the overlapped query.
444+
*/
445+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, irec,
446+
&found);
447+
if (error)
448+
return error;
449+
if (found)
450+
error = xfs_rmap_lookup_le_range_helper(cur, irec, &info);
451+
if (!error)
452+
error = xfs_rmap_query_range(cur, &info.high, &info.high,
453+
xfs_rmap_lookup_le_range_helper, &info);
454+
if (error != -ECANCELED)
455+
return error;
456+
457+
*stat = 1;
458+
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
459+
cur->bc_ag.pag->pag_agno, irec->rm_startblock,
460+
irec->rm_blockcount, irec->rm_owner, irec->rm_offset,
461+
irec->rm_flags);
462+
return 0;
403463
}
404464

405465
/*
@@ -510,21 +570,14 @@ xfs_rmap_unmap(
510570
* for the AG headers at rm_startblock == 0 created by mkfs/growfs that
511571
* will not ever be removed from the tree.
512572
*/
513-
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
573+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec, &i);
514574
if (error)
515575
goto out_error;
516576
if (XFS_IS_CORRUPT(mp, i != 1)) {
517577
error = -EFSCORRUPTED;
518578
goto out_error;
519579
}
520580

521-
error = xfs_rmap_get_rec(cur, &ltrec, &i);
522-
if (error)
523-
goto out_error;
524-
if (XFS_IS_CORRUPT(mp, i != 1)) {
525-
error = -EFSCORRUPTED;
526-
goto out_error;
527-
}
528581
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
529582
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
530583
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -786,18 +839,11 @@ xfs_rmap_map(
786839
* record for our insertion point. This will also give us the record for
787840
* start block contiguity tests.
788841
*/
789-
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
842+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &ltrec,
790843
&have_lt);
791844
if (error)
792845
goto out_error;
793846
if (have_lt) {
794-
error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
795-
if (error)
796-
goto out_error;
797-
if (XFS_IS_CORRUPT(mp, have_lt != 1)) {
798-
error = -EFSCORRUPTED;
799-
goto out_error;
800-
}
801847
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
802848
cur->bc_ag.pag->pag_agno, ltrec.rm_startblock,
803849
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -1022,21 +1068,14 @@ xfs_rmap_convert(
10221068
* record for our insertion point. This will also give us the record for
10231069
* start block contiguity tests.
10241070
*/
1025-
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
1071+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, &PREV, &i);
10261072
if (error)
10271073
goto done;
10281074
if (XFS_IS_CORRUPT(mp, i != 1)) {
10291075
error = -EFSCORRUPTED;
10301076
goto done;
10311077
}
10321078

1033-
error = xfs_rmap_get_rec(cur, &PREV, &i);
1034-
if (error)
1035-
goto done;
1036-
if (XFS_IS_CORRUPT(mp, i != 1)) {
1037-
error = -EFSCORRUPTED;
1038-
goto done;
1039-
}
10401079
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
10411080
cur->bc_ag.pag->pag_agno, PREV.rm_startblock,
10421081
PREV.rm_blockcount, PREV.rm_owner,
@@ -1140,7 +1179,7 @@ xfs_rmap_convert(
11401179
_RET_IP_);
11411180

11421181
/* reset the cursor back to PREV */
1143-
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
1182+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, oldext, NULL, &i);
11441183
if (error)
11451184
goto done;
11461185
if (XFS_IS_CORRUPT(mp, i != 1)) {
@@ -2677,7 +2716,7 @@ xfs_rmap_record_exists(
26772716
ASSERT(XFS_RMAP_NON_INODE_OWNER(owner) ||
26782717
(flags & XFS_RMAP_BMBT_BLOCK));
26792718

2680-
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags,
2719+
error = xfs_rmap_lookup_le(cur, bno, owner, offset, flags, &irec,
26812720
&has_record);
26822721
if (error)
26832722
return error;
@@ -2686,14 +2725,6 @@ xfs_rmap_record_exists(
26862725
return 0;
26872726
}
26882727

2689-
error = xfs_rmap_get_rec(cur, &irec, &has_record);
2690-
if (error)
2691-
return error;
2692-
if (!has_record) {
2693-
*has_rmap = false;
2694-
return 0;
2695-
}
2696-
26972728
*has_rmap = (irec.rm_owner == owner && irec.rm_startblock <= bno &&
26982729
irec.rm_startblock + irec.rm_blockcount >= bno + len);
26992730
return 0;

fs/xfs/libxfs/xfs_rmap.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp,
122122
const struct xfs_owner_info *oinfo);
123123

124124
int xfs_rmap_lookup_le(struct xfs_btree_cur *cur, xfs_agblock_t bno,
125-
xfs_extlen_t len, uint64_t owner, uint64_t offset,
126-
unsigned int flags, int *stat);
125+
uint64_t owner, uint64_t offset, unsigned int flags,
126+
struct xfs_rmap_irec *irec, int *stat);
127127
int xfs_rmap_lookup_eq(struct xfs_btree_cur *cur, xfs_agblock_t bno,
128128
xfs_extlen_t len, uint64_t owner, uint64_t offset,
129129
unsigned int flags, int *stat);
@@ -184,9 +184,6 @@ int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type,
184184
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
185185
xfs_exntst_t state, struct xfs_btree_cur **pcur);
186186

187-
int xfs_rmap_find_left_neighbor(struct xfs_btree_cur *cur, xfs_agblock_t bno,
188-
uint64_t owner, uint64_t offset, unsigned int flags,
189-
struct xfs_rmap_irec *irec, int *stat);
190187
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
191188
uint64_t owner, uint64_t offset, unsigned int flags,
192189
struct xfs_rmap_irec *irec, int *stat);

fs/xfs/scrub/bmap.c

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,29 +133,13 @@ xchk_bmap_get_rmap(
133133
if (info->is_shared) {
134134
error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
135135
owner, offset, rflags, rmap, &has_rmap);
136-
if (!xchk_should_check_xref(info->sc, &error,
137-
&info->sc->sa.rmap_cur))
138-
return false;
139-
goto out;
136+
} else {
137+
error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
138+
owner, offset, rflags, rmap, &has_rmap);
140139
}
141-
142-
/*
143-
* Otherwise, use the (faster) regular lookup.
144-
*/
145-
error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
146-
offset, rflags, &has_rmap);
147-
if (!xchk_should_check_xref(info->sc, &error,
148-
&info->sc->sa.rmap_cur))
140+
if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
149141
return false;
150-
if (!has_rmap)
151-
goto out;
152142

153-
error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
154-
if (!xchk_should_check_xref(info->sc, &error,
155-
&info->sc->sa.rmap_cur))
156-
return false;
157-
158-
out:
159143
if (!has_rmap)
160144
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
161145
irec->br_startoff);

fs/xfs/xfs_trace.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
418418
__field(unsigned, lockval)
419419
__field(unsigned, flags)
420420
__field(unsigned long, caller_ip)
421+
__field(const void *, buf_ops)
421422
),
422423
TP_fast_assign(
423424
__entry->dev = bp->b_target->bt_dev;
@@ -428,16 +429,18 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
428429
__entry->lockval = bp->b_sema.count;
429430
__entry->flags = bp->b_flags;
430431
__entry->caller_ip = caller_ip;
432+
__entry->buf_ops = bp->b_ops;
431433
),
432434
TP_printk("dev %d:%d daddr 0x%llx bbcount 0x%x hold %d pincount %d "
433-
"lock %d flags %s caller %pS",
435+
"lock %d flags %s bufops %pS caller %pS",
434436
MAJOR(__entry->dev), MINOR(__entry->dev),
435437
(unsigned long long)__entry->bno,
436438
__entry->nblks,
437439
__entry->hold,
438440
__entry->pincount,
439441
__entry->lockval,
440442
__print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
443+
__entry->buf_ops,
441444
(void *)__entry->caller_ip)
442445
)
443446

0 commit comments

Comments
 (0)