Skip to content

Commit c77b375

Browse files
author
Darrick J. Wong
committed
xfs: introduce vectored scrub mode
Introduce a variant on XFS_SCRUB_METADATA that allows for a vectored mode. The caller specifies the principal metadata object that they want to scrub (allocation group, inode, etc.) once, followed by an array of scrub types they want called on that object. The kernel runs the scrub operations and writes the output flags and errno code to the corresponding array element. A new pseudo scrub type BARRIER is introduced to force the kernel to return to userspace if any corruptions have been found when scrubbing the previous scrub types in the array. This enables userspace to schedule, for example, the sequence: 1. data fork 2. barrier 3. directory If the data fork scrub is clean, then the kernel will perform the directory scrub. If not, the barrier in 2 will exit back to userspace. The alternative would have been an interface where userspace passes a pointer to an empty buffer, and the kernel formats that with xfs_scrub_vecs that tell userspace what it scrubbed and what the outcome was. With that the kernel would have to communicate that the buffer needed to have been at least X size, even though for our cases XFS_SCRUB_TYPE_NR + 2 would always be enough. Compared to that, this design keeps all the dependency policy and ordering logic in userspace where it already resides instead of duplicating it in the kernel. The downside of that is that it needs the barrier logic. When running fstests in "rebuild all metadata after each test" mode, I observed a 10% reduction in runtime due to fewer transitions across the system call boundary. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]>
1 parent be7cf17 commit c77b375

File tree

5 files changed

+264
-1
lines changed

5 files changed

+264
-1
lines changed

fs/xfs/libxfs/xfs_fs.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,15 @@ struct xfs_scrub_metadata {
725725
/* Number of scrub subcommands. */
726726
#define XFS_SCRUB_TYPE_NR 29
727727

728+
/*
729+
* This special type code only applies to the vectored scrub implementation.
730+
*
731+
* If any of the previous scrub vectors recorded runtime errors or have
732+
* sv_flags bits set that match the OFLAG bits in the barrier vector's
733+
* sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace.
734+
*/
735+
#define XFS_SCRUB_TYPE_BARRIER (0xFFFFFFFF)
736+
728737
/* i: Repair this metadata. */
729738
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
730739

@@ -769,6 +778,29 @@ struct xfs_scrub_metadata {
769778
XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
770779
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
771780

781+
/* Vectored scrub calls to reduce the number of kernel transitions. */
782+
783+
struct xfs_scrub_vec {
784+
__u32 sv_type; /* XFS_SCRUB_TYPE_* */
785+
__u32 sv_flags; /* XFS_SCRUB_FLAGS_* */
786+
__s32 sv_ret; /* 0 or a negative error code */
787+
__u32 sv_reserved; /* must be zero */
788+
};
789+
790+
/* Vectored metadata scrub control structure. */
791+
struct xfs_scrub_vec_head {
792+
__u64 svh_ino; /* inode number. */
793+
__u32 svh_gen; /* inode generation. */
794+
__u32 svh_agno; /* ag number. */
795+
__u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */
796+
__u16 svh_rest_us; /* wait this much time between vector items */
797+
__u16 svh_nr; /* number of svh_vectors */
798+
__u64 svh_reserved; /* must be zero */
799+
__u64 svh_vectors; /* pointer to buffer of xfs_scrub_vec */
800+
};
801+
802+
#define XFS_SCRUB_VEC_FLAGS_ALL (0)
803+
772804
/*
773805
* ioctl limits
774806
*/
@@ -928,6 +960,7 @@ struct xfs_getparents_by_handle {
928960
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
929961
#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
930962
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
963+
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
931964

932965
/*
933966
* ioctl commands that replace IRIX syssgi()'s

fs/xfs/scrub/scrub.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "xfs_exchmaps.h"
2222
#include "xfs_dir2.h"
2323
#include "xfs_parent.h"
24+
#include "xfs_icache.h"
2425
#include "scrub/scrub.h"
2526
#include "scrub/common.h"
2627
#include "scrub/trace.h"
@@ -749,3 +750,151 @@ xfs_ioc_scrub_metadata(
749750

750751
return 0;
751752
}
753+
754+
/* Decide if there have been any scrub failures up to this point. */
755+
static inline int
756+
xfs_scrubv_check_barrier(
757+
struct xfs_mount *mp,
758+
const struct xfs_scrub_vec *vectors,
759+
const struct xfs_scrub_vec *stop_vec)
760+
{
761+
const struct xfs_scrub_vec *v;
762+
__u32 failmask;
763+
764+
failmask = stop_vec->sv_flags & XFS_SCRUB_FLAGS_OUT;
765+
766+
for (v = vectors; v < stop_vec; v++) {
767+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER)
768+
continue;
769+
770+
/*
771+
* Runtime errors count as a previous failure, except the ones
772+
* used to ask userspace to retry.
773+
*/
774+
switch (v->sv_ret) {
775+
case -EBUSY:
776+
case -ENOENT:
777+
case -EUSERS:
778+
case 0:
779+
break;
780+
default:
781+
return -ECANCELED;
782+
}
783+
784+
/*
785+
* If any of the out-flags on the scrub vector match the mask
786+
* that was set on the barrier vector, that's a previous fail.
787+
*/
788+
if (v->sv_flags & failmask)
789+
return -ECANCELED;
790+
}
791+
792+
return 0;
793+
}
794+
795+
/* Vectored scrub implementation to reduce ioctl calls. */
796+
int
797+
xfs_ioc_scrubv_metadata(
798+
struct file *file,
799+
void __user *arg)
800+
{
801+
struct xfs_scrub_vec_head head;
802+
struct xfs_scrub_vec_head __user *uhead = arg;
803+
struct xfs_scrub_vec *vectors;
804+
struct xfs_scrub_vec __user *uvectors;
805+
struct xfs_inode *ip_in = XFS_I(file_inode(file));
806+
struct xfs_mount *mp = ip_in->i_mount;
807+
struct xfs_scrub_vec *v;
808+
size_t vec_bytes;
809+
unsigned int i;
810+
int error = 0;
811+
812+
if (!capable(CAP_SYS_ADMIN))
813+
return -EPERM;
814+
815+
if (copy_from_user(&head, uhead, sizeof(head)))
816+
return -EFAULT;
817+
818+
if (head.svh_reserved)
819+
return -EINVAL;
820+
if (head.svh_flags & ~XFS_SCRUB_VEC_FLAGS_ALL)
821+
return -EINVAL;
822+
if (head.svh_nr == 0)
823+
return 0;
824+
825+
vec_bytes = array_size(head.svh_nr, sizeof(struct xfs_scrub_vec));
826+
if (vec_bytes > PAGE_SIZE)
827+
return -ENOMEM;
828+
829+
uvectors = (void __user *)(uintptr_t)head.svh_vectors;
830+
vectors = memdup_user(uvectors, vec_bytes);
831+
if (IS_ERR(vectors))
832+
return PTR_ERR(vectors);
833+
834+
trace_xchk_scrubv_start(ip_in, &head);
835+
836+
for (i = 0, v = vectors; i < head.svh_nr; i++, v++) {
837+
if (v->sv_reserved) {
838+
error = -EINVAL;
839+
goto out_free;
840+
}
841+
842+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER &&
843+
(v->sv_flags & ~XFS_SCRUB_FLAGS_OUT)) {
844+
error = -EINVAL;
845+
goto out_free;
846+
}
847+
848+
trace_xchk_scrubv_item(mp, &head, i, v);
849+
}
850+
851+
/* Run all the scrubbers. */
852+
for (i = 0, v = vectors; i < head.svh_nr; i++, v++) {
853+
struct xfs_scrub_metadata sm = {
854+
.sm_type = v->sv_type,
855+
.sm_flags = v->sv_flags,
856+
.sm_ino = head.svh_ino,
857+
.sm_gen = head.svh_gen,
858+
.sm_agno = head.svh_agno,
859+
};
860+
861+
if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) {
862+
v->sv_ret = xfs_scrubv_check_barrier(mp, vectors, v);
863+
if (v->sv_ret) {
864+
trace_xchk_scrubv_barrier_fail(mp, &head, i, v);
865+
break;
866+
}
867+
868+
continue;
869+
}
870+
871+
v->sv_ret = xfs_scrub_metadata(file, &sm);
872+
v->sv_flags = sm.sm_flags;
873+
874+
trace_xchk_scrubv_outcome(mp, &head, i, v);
875+
876+
if (head.svh_rest_us) {
877+
ktime_t expires;
878+
879+
expires = ktime_add_ns(ktime_get(),
880+
head.svh_rest_us * 1000);
881+
set_current_state(TASK_KILLABLE);
882+
schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
883+
}
884+
885+
if (fatal_signal_pending(current)) {
886+
error = -EINTR;
887+
goto out_free;
888+
}
889+
}
890+
891+
if (copy_to_user(uvectors, vectors, vec_bytes) ||
892+
copy_to_user(uhead, &head, sizeof(head))) {
893+
error = -EFAULT;
894+
goto out_free;
895+
}
896+
897+
out_free:
898+
kfree(vectors);
899+
return error;
900+
}

fs/xfs/scrub/trace.h

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
6969
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
7070
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
7171
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
72+
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
7273

7374
#define XFS_SCRUB_TYPE_STRINGS \
7475
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -99,7 +100,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
99100
{ XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }, \
100101
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \
101102
{ XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \
102-
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }
103+
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
104+
{ XFS_SCRUB_TYPE_BARRIER, "barrier" }
103105

104106
#define XFS_SCRUB_FLAG_STRINGS \
105107
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -208,6 +210,81 @@ DEFINE_EVENT(xchk_fsgate_class, name, \
208210
DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_enable);
209211
DEFINE_SCRUB_FSHOOK_EVENT(xchk_fsgates_disable);
210212

213+
DECLARE_EVENT_CLASS(xchk_vector_head_class,
214+
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead),
215+
TP_ARGS(ip, vhead),
216+
TP_STRUCT__entry(
217+
__field(dev_t, dev)
218+
__field(xfs_ino_t, ino)
219+
__field(xfs_agnumber_t, agno)
220+
__field(xfs_ino_t, inum)
221+
__field(unsigned int, gen)
222+
__field(unsigned int, flags)
223+
__field(unsigned short, rest_us)
224+
__field(unsigned short, nr_vecs)
225+
),
226+
TP_fast_assign(
227+
__entry->dev = ip->i_mount->m_super->s_dev;
228+
__entry->ino = ip->i_ino;
229+
__entry->agno = vhead->svh_agno;
230+
__entry->inum = vhead->svh_ino;
231+
__entry->gen = vhead->svh_gen;
232+
__entry->flags = vhead->svh_flags;
233+
__entry->rest_us = vhead->svh_rest_us;
234+
__entry->nr_vecs = vhead->svh_nr;
235+
),
236+
TP_printk("dev %d:%d ino 0x%llx agno 0x%x inum 0x%llx gen 0x%x flags 0x%x rest_us %u nr_vecs %u",
237+
MAJOR(__entry->dev), MINOR(__entry->dev),
238+
__entry->ino,
239+
__entry->agno,
240+
__entry->inum,
241+
__entry->gen,
242+
__entry->flags,
243+
__entry->rest_us,
244+
__entry->nr_vecs)
245+
)
246+
#define DEFINE_SCRUBV_HEAD_EVENT(name) \
247+
DEFINE_EVENT(xchk_vector_head_class, name, \
248+
TP_PROTO(struct xfs_inode *ip, struct xfs_scrub_vec_head *vhead), \
249+
TP_ARGS(ip, vhead))
250+
251+
DEFINE_SCRUBV_HEAD_EVENT(xchk_scrubv_start);
252+
253+
DECLARE_EVENT_CLASS(xchk_vector_class,
254+
TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead,
255+
unsigned int vec_nr, struct xfs_scrub_vec *v),
256+
TP_ARGS(mp, vhead, vec_nr, v),
257+
TP_STRUCT__entry(
258+
__field(dev_t, dev)
259+
__field(unsigned int, vec_nr)
260+
__field(unsigned int, vec_type)
261+
__field(unsigned int, vec_flags)
262+
__field(int, vec_ret)
263+
),
264+
TP_fast_assign(
265+
__entry->dev = mp->m_super->s_dev;
266+
__entry->vec_nr = vec_nr;
267+
__entry->vec_type = v->sv_type;
268+
__entry->vec_flags = v->sv_flags;
269+
__entry->vec_ret = v->sv_ret;
270+
),
271+
TP_printk("dev %d:%d vec[%u] type %s flags %s ret %d",
272+
MAJOR(__entry->dev), MINOR(__entry->dev),
273+
__entry->vec_nr,
274+
__print_symbolic(__entry->vec_type, XFS_SCRUB_TYPE_STRINGS),
275+
__print_flags(__entry->vec_flags, "|", XFS_SCRUB_FLAG_STRINGS),
276+
__entry->vec_ret)
277+
)
278+
#define DEFINE_SCRUBV_EVENT(name) \
279+
DEFINE_EVENT(xchk_vector_class, name, \
280+
TP_PROTO(struct xfs_mount *mp, struct xfs_scrub_vec_head *vhead, \
281+
unsigned int vec_nr, struct xfs_scrub_vec *v), \
282+
TP_ARGS(mp, vhead, vec_nr, v))
283+
284+
DEFINE_SCRUBV_EVENT(xchk_scrubv_barrier_fail);
285+
DEFINE_SCRUBV_EVENT(xchk_scrubv_item);
286+
DEFINE_SCRUBV_EVENT(xchk_scrubv_outcome);
287+
211288
TRACE_EVENT(xchk_op_error,
212289
TP_PROTO(struct xfs_scrub *sc, xfs_agnumber_t agno,
213290
xfs_agblock_t bno, int error, void *ret_ip),

fs/xfs/scrub/xfs_scrub.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
#ifndef CONFIG_XFS_ONLINE_SCRUB
1010
# define xfs_ioc_scrub_metadata(f, a) (-ENOTTY)
11+
# define xfs_ioc_scrubv_metadata(f, a) (-ENOTTY)
1112
#else
1213
int xfs_ioc_scrub_metadata(struct file *file, void __user *arg);
14+
int xfs_ioc_scrubv_metadata(struct file *file, void __user *arg);
1315
#endif /* CONFIG_XFS_ONLINE_SCRUB */
1416

1517
#endif /* __XFS_SCRUB_H__ */

fs/xfs/xfs_ioctl.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,6 +1413,8 @@ xfs_file_ioctl(
14131413
case FS_IOC_GETFSMAP:
14141414
return xfs_ioc_getfsmap(ip, arg);
14151415

1416+
case XFS_IOC_SCRUBV_METADATA:
1417+
return xfs_ioc_scrubv_metadata(filp, arg);
14161418
case XFS_IOC_SCRUB_METADATA:
14171419
return xfs_ioc_scrub_metadata(filp, arg);
14181420

0 commit comments

Comments
 (0)