Skip to content

Commit 496baa2

Browse files
author
Chandan Babu R
committed
Merge tag 'vectorized-scrub-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeC
xfs: vectorize scrub kernel calls Create a vectorized version of the metadata scrub and repair ioctl, and adapt xfs_scrub to use that. This mitigates the impact of system call overhead on xfs_scrub runtime. Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: Chandan Babu R <[email protected]> * tag 'vectorized-scrub-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: introduce vectored scrub mode xfs: move xfs_ioc_scrub_metadata to scrub.c xfs: reduce the rate of cond_resched calls inside scrub
2 parents f7cea94 + c77b375 commit 496baa2

File tree

10 files changed

+366
-59
lines changed

10 files changed

+366
-59
lines changed

fs/xfs/libxfs/xfs_fs.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,15 @@ struct xfs_scrub_metadata {
725725
/* Number of scrub subcommands. */
726726
#define XFS_SCRUB_TYPE_NR 29
727727

728+
/*
729+
* This special type code only applies to the vectored scrub implementation.
730+
*
731+
* If any of the previous scrub vectors recorded runtime errors or have
732+
* sv_flags bits set that match the OFLAG bits in the barrier vector's
733+
* sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace.
734+
*/
735+
#define XFS_SCRUB_TYPE_BARRIER (0xFFFFFFFF)
736+
728737
/* i: Repair this metadata. */
729738
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
730739

@@ -769,6 +778,29 @@ struct xfs_scrub_metadata {
769778
XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
770779
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
771780

781+
/* Vectored scrub calls to reduce the number of kernel transitions. */
782+
783+
struct xfs_scrub_vec {
784+
__u32 sv_type; /* XFS_SCRUB_TYPE_* */
785+
__u32 sv_flags; /* XFS_SCRUB_FLAGS_* */
786+
__s32 sv_ret; /* 0 or a negative error code */
787+
__u32 sv_reserved; /* must be zero */
788+
};
789+
790+
/* Vectored metadata scrub control structure. */
791+
struct xfs_scrub_vec_head {
792+
__u64 svh_ino; /* inode number. */
793+
__u32 svh_gen; /* inode generation. */
794+
__u32 svh_agno; /* ag number. */
795+
__u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */
796+
__u16 svh_rest_us; /* wait this much time between vector items */
797+
__u16 svh_nr; /* number of svh_vectors */
798+
__u64 svh_reserved; /* must be zero */
799+
__u64 svh_vectors; /* pointer to buffer of xfs_scrub_vec */
800+
};
801+
802+
#define XFS_SCRUB_VEC_FLAGS_ALL (0)
803+
772804
/*
773805
* ioctl limits
774806
*/
@@ -928,6 +960,7 @@ struct xfs_getparents_by_handle {
928960
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
929961
#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
930962
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
963+
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
931964

932965
/*
933966
* ioctl commands that replace IRIX syssgi()'s

fs/xfs/scrub/common.h

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,6 @@
66
#ifndef __XFS_SCRUB_COMMON_H__
77
#define __XFS_SCRUB_COMMON_H__
88

9-
/*
10-
* We /could/ terminate a scrub/repair operation early. If we're not
11-
* in a good place to continue (fatal signal, etc.) then bail out.
12-
* Note that we're careful not to make any judgements about *error.
13-
*/
14-
static inline bool
15-
xchk_should_terminate(
16-
struct xfs_scrub *sc,
17-
int *error)
18-
{
19-
/*
20-
* If preemption is disabled, we need to yield to the scheduler every
21-
* few seconds so that we don't run afoul of the soft lockup watchdog
22-
* or RCU stall detector.
23-
*/
24-
cond_resched();
25-
26-
if (fatal_signal_pending(current)) {
27-
if (*error == 0)
28-
*error = -EINTR;
29-
return true;
30-
}
31-
return false;
32-
}
33-
349
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
3510
int xchk_trans_alloc_empty(struct xfs_scrub *sc);
3611
void xchk_trans_cancel(struct xfs_scrub *sc);

fs/xfs/scrub/scrub.c

Lines changed: 176 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "xfs_exchmaps.h"
2222
#include "xfs_dir2.h"
2323
#include "xfs_parent.h"
24+
#include "xfs_icache.h"
2425
#include "scrub/scrub.h"
2526
#include "scrub/common.h"
2627
#include "scrub/trace.h"
@@ -578,7 +579,7 @@ xchk_scrub_create_subord(
578579
}
579580

580581
/* Dispatch metadata scrubbing. */
581-
int
582+
STATIC int
582583
xfs_scrub_metadata(
583584
struct file *file,
584585
struct xfs_scrub_metadata *sm)
@@ -620,6 +621,7 @@ xfs_scrub_metadata(
620621
sc->sm = sm;
621622
sc->ops = &meta_scrub_ops[sm->sm_type];
622623
sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
624+
sc->relax = INIT_XCHK_RELAX;
623625
retry_op:
624626
/*
625627
* When repairs are allowed, prevent freezing or readonly remount while
@@ -723,3 +725,176 @@ xfs_scrub_metadata(
723725
run.retries++;
724726
goto retry_op;
725727
}
728+
729+
/* Scrub one aspect of one piece of metadata. */
730+
int
731+
xfs_ioc_scrub_metadata(
732+
struct file *file,
733+
void __user *arg)
734+
{
735+
struct xfs_scrub_metadata scrub;
736+
int error;
737+
738+
if (!capable(CAP_SYS_ADMIN))
739+
return -EPERM;
740+
741+
if (copy_from_user(&scrub, arg, sizeof(scrub)))
742+
return -EFAULT;
743+
744+
error = xfs_scrub_metadata(file, &scrub);
745+
if (error)
746+
return error;
747+
748+
if (copy_to_user(arg, &scrub, sizeof(scrub)))
749+
return -EFAULT;
750+
751+
return 0;
752+
}
753+
754+
/* Decide if there have been any scrub failures up to this point. */
755+
static inline int
756+
xfs_scrubv_check_barrier(
757+
struct xfs_mount *mp,
758+
const struct xfs_scrub_vec *vectors,
759+
const struct xfs_scrub_vec *stop_vec)
760+
{
761+
const struct xfs_scrub_vec *v;
762+
__u32 failmask;
763+
764+
failmask = stop_vec->sv_flags & XFS_SCRUB_FLAGS_OUT;
765+
766+
for (v = vectors; v < stop_vec; v++) {
767+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER)
768+
continue;
769+
770+
/*
771+
* Runtime errors count as a previous failure, except the ones
772+
* used to ask userspace to retry.
773+
*/
774+
switch (v->sv_ret) {
775+
case -EBUSY:
776+
case -ENOENT:
777+
case -EUSERS:
778+
case 0:
779+
break;
780+
default:
781+
return -ECANCELED;
782+
}
783+
784+
/*
785+
* If any of the out-flags on the scrub vector match the mask
786+
* that was set on the barrier vector, that's a previous fail.
787+
*/
788+
if (v->sv_flags & failmask)
789+
return -ECANCELED;
790+
}
791+
792+
return 0;
793+
}
794+
795+
/* Vectored scrub implementation to reduce ioctl calls. */
796+
int
797+
xfs_ioc_scrubv_metadata(
798+
struct file *file,
799+
void __user *arg)
800+
{
801+
struct xfs_scrub_vec_head head;
802+
struct xfs_scrub_vec_head __user *uhead = arg;
803+
struct xfs_scrub_vec *vectors;
804+
struct xfs_scrub_vec __user *uvectors;
805+
struct xfs_inode *ip_in = XFS_I(file_inode(file));
806+
struct xfs_mount *mp = ip_in->i_mount;
807+
struct xfs_scrub_vec *v;
808+
size_t vec_bytes;
809+
unsigned int i;
810+
int error = 0;
811+
812+
if (!capable(CAP_SYS_ADMIN))
813+
return -EPERM;
814+
815+
if (copy_from_user(&head, uhead, sizeof(head)))
816+
return -EFAULT;
817+
818+
if (head.svh_reserved)
819+
return -EINVAL;
820+
if (head.svh_flags & ~XFS_SCRUB_VEC_FLAGS_ALL)
821+
return -EINVAL;
822+
if (head.svh_nr == 0)
823+
return 0;
824+
825+
vec_bytes = array_size(head.svh_nr, sizeof(struct xfs_scrub_vec));
826+
if (vec_bytes > PAGE_SIZE)
827+
return -ENOMEM;
828+
829+
uvectors = (void __user *)(uintptr_t)head.svh_vectors;
830+
vectors = memdup_user(uvectors, vec_bytes);
831+
if (IS_ERR(vectors))
832+
return PTR_ERR(vectors);
833+
834+
trace_xchk_scrubv_start(ip_in, &head);
835+
836+
for (i = 0, v = vectors; i < head.svh_nr; i++, v++) {
837+
if (v->sv_reserved) {
838+
error = -EINVAL;
839+
goto out_free;
840+
}
841+
842+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER &&
843+
(v->sv_flags & ~XFS_SCRUB_FLAGS_OUT)) {
844+
error = -EINVAL;
845+
goto out_free;
846+
}
847+
848+
trace_xchk_scrubv_item(mp, &head, i, v);
849+
}
850+
851+
/* Run all the scrubbers. */
852+
for (i = 0, v = vectors; i < head.svh_nr; i++, v++) {
853+
struct xfs_scrub_metadata sm = {
854+
.sm_type = v->sv_type,
855+
.sm_flags = v->sv_flags,
856+
.sm_ino = head.svh_ino,
857+
.sm_gen = head.svh_gen,
858+
.sm_agno = head.svh_agno,
859+
};
860+
861+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) {
862+
v->sv_ret = xfs_scrubv_check_barrier(mp, vectors, v);
863+
if (v->sv_ret) {
864+
trace_xchk_scrubv_barrier_fail(mp, &head, i, v);
865+
break;
866+
}
867+
868+
continue;
869+
}
870+
871+
v->sv_ret = xfs_scrub_metadata(file, &sm);
872+
v->sv_flags = sm.sm_flags;
873+
874+
trace_xchk_scrubv_outcome(mp, &head, i, v);
875+
876+
if (head.svh_rest_us) {
877+
ktime_t expires;
878+
879+
expires = ktime_add_ns(ktime_get(),
880+
head.svh_rest_us * 1000);
881+
set_current_state(TASK_KILLABLE);
882+
schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
883+
}
884+
885+
if (fatal_signal_pending(current)) {
886+
error = -EINTR;
887+
goto out_free;
888+
}
889+
}
890+
891+
if (copy_to_user(uvectors, vectors, vec_bytes) ||
892+
copy_to_user(uhead, &head, sizeof(head))) {
893+
error = -EFAULT;
894+
goto out_free;
895+
}
896+
897+
out_free:
898+
kfree(vectors);
899+
return error;
900+
}

fs/xfs/scrub/scrub.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,49 @@
88

99
struct xfs_scrub;
1010

11+
struct xchk_relax {
12+
unsigned long next_resched;
13+
unsigned int resched_nr;
14+
bool interruptible;
15+
};
16+
17+
/* Yield to the scheduler at most 10x per second. */
18+
#define XCHK_RELAX_NEXT (jiffies + (HZ / 10))
19+
20+
#define INIT_XCHK_RELAX \
21+
(struct xchk_relax){ \
22+
.next_resched = XCHK_RELAX_NEXT, \
23+
.resched_nr = 0, \
24+
.interruptible = true, \
25+
}
26+
27+
/*
28+
* Relax during a scrub operation and exit if there's a fatal signal pending.
29+
*
30+
* If preemption is disabled, we need to yield to the scheduler every now and
31+
* then so that we don't run afoul of the soft lockup watchdog or RCU stall
32+
* detector. cond_resched calls are somewhat expensive (~5ns) so we want to
33+
* ratelimit this to 10x per second. Amortize the cost of the other checks by
34+
* only doing it once every 100 calls.
35+
*/
36+
static inline int xchk_maybe_relax(struct xchk_relax *widget)
37+
{
38+
/* Amortize the cost of scheduling and checking signals. */
39+
if (likely(++widget->resched_nr < 100))
40+
return 0;
41+
widget->resched_nr = 0;
42+
43+
if (unlikely(widget->next_resched <= jiffies)) {
44+
cond_resched();
45+
widget->next_resched = XCHK_RELAX_NEXT;
46+
}
47+
48+
if (widget->interruptible && fatal_signal_pending(current))
49+
return -EINTR;
50+
51+
return 0;
52+
}
53+
1154
/*
1255
* Standard flags for allocating memory within scrub. NOFS context is
1356
* configured by the process allocation scope. Scrub and repair must be able
@@ -123,6 +166,9 @@ struct xfs_scrub {
123166
*/
124167
unsigned int sick_mask;
125168

169+
/* next time we want to cond_resched() */
170+
struct xchk_relax relax;
171+
126172
/* State tracking for single-AG operations. */
127173
struct xchk_ag sa;
128174
};
@@ -167,6 +213,24 @@ struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
167213
unsigned int subtype);
168214
void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
169215

216+
/*
217+
* We /could/ terminate a scrub/repair operation early. If we're not
218+
* in a good place to continue (fatal signal, etc.) then bail out.
219+
* Note that we're careful not to make any judgements about *error.
220+
*/
221+
static inline bool
222+
xchk_should_terminate(
223+
struct xfs_scrub *sc,
224+
int *error)
225+
{
226+
if (xchk_maybe_relax(&sc->relax)) {
227+
if (*error == 0)
228+
*error = -EINTR;
229+
return true;
230+
}
231+
return false;
232+
}
233+
170234
/* Metadata scrubbers */
171235
int xchk_tester(struct xfs_scrub *sc);
172236
int xchk_superblock(struct xfs_scrub *sc);

0 commit comments

Comments
 (0)