Skip to content

Commit d668fc1

Browse files
author
Chandan Babu R
committed
Merge tag 'big-array-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-mergeA
xfs: stage repair information in pageable memory In general, online repair of an indexed record set walks the filesystem looking for records. These records are sorted and bulk-loaded into a new btree. To make this happen without pinning gigabytes of metadata in memory, first create an abstraction ('xfile') of memfd files so that kernel code can access paged memory, and then an array abstraction ('xfarray') based on xfiles so that online repair can create an array of new records without pinning memory. These two data storage abstractions are critical for repair of space metadata -- the memory used is pageable, which helps us avoid pinning kernel memory and driving OOM problems; and they are byte-accessible enough that we can use them like (very slow and programmatic) memory buffers. Later patchsets will build on this functionality to provide blob storage and btrees. Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: Chandan Babu R <[email protected]> * tag 'big-array-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux: xfs: improve xfarray quicksort pivot xfs: cache pages used for xfarray quicksort convergence xfs: speed up xfarray sort by sorting xfile page contents directly xfs: teach xfile to pass back direct-map pages to caller xfs: convert xfarray insertion sort to heapsort using scratchpad memory xfs: enable sorting of xfile-backed arrays xfs: create a big array data structure
2 parents 81fbc5f + 764018c commit d668fc1

File tree

8 files changed

+1987
-1
lines changed

8 files changed

+1987
-1
lines changed

fs/xfs/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ config XFS_ONLINE_SCRUB
128128
bool "XFS online metadata check support"
129129
default n
130130
depends on XFS_FS
131+
depends on TMPFS && SHMEM
131132
select XFS_DRAIN_INTENTS
132133
help
133134
If you say Y here you will be able to check metadata on a

fs/xfs/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ xfs-y += $(addprefix scrub/, \
164164
rmap.o \
165165
scrub.o \
166166
symlink.o \
167+
xfarray.o \
168+
xfile.o \
167169
)
168170

169171
xfs-$(CONFIG_XFS_RT) += scrub/rtbitmap.o

fs/xfs/scrub/trace.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
#include "xfs_mount.h"
1313
#include "xfs_inode.h"
1414
#include "xfs_btree.h"
15-
#include "scrub/scrub.h"
1615
#include "xfs_ag.h"
16+
#include "scrub/scrub.h"
17+
#include "scrub/xfile.h"
18+
#include "scrub/xfarray.h"
1719

1820
/* Figure out which block the btree cursor was pointing to. */
1921
static inline xfs_fsblock_t

fs/xfs/scrub/trace.h

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#include <linux/tracepoint.h>
1717
#include "xfs_bit.h"
1818

19+
struct xfile;
20+
struct xfarray;
21+
struct xfarray_sortinfo;
22+
1923
/*
2024
* ftrace's __print_symbolic requires that all enum values be wrapped in the
2125
* TRACE_DEFINE_ENUM macro so that the enum value can be encoded in the ftrace
@@ -725,6 +729,262 @@ TRACE_EVENT(xchk_refcount_incorrect,
725729
__entry->seen)
726730
)
727731

732+
TRACE_EVENT(xfile_create,
733+
TP_PROTO(struct xfile *xf),
734+
TP_ARGS(xf),
735+
TP_STRUCT__entry(
736+
__field(dev_t, dev)
737+
__field(unsigned long, ino)
738+
__array(char, pathname, 256)
739+
),
740+
TP_fast_assign(
741+
char pathname[257];
742+
char *path;
743+
744+
__entry->ino = file_inode(xf->file)->i_ino;
745+
memset(pathname, 0, sizeof(pathname));
746+
path = file_path(xf->file, pathname, sizeof(pathname) - 1);
747+
if (IS_ERR(path))
748+
path = "(unknown)";
749+
strncpy(__entry->pathname, path, sizeof(__entry->pathname));
750+
),
751+
TP_printk("xfino 0x%lx path '%s'",
752+
__entry->ino,
753+
__entry->pathname)
754+
);
755+
756+
TRACE_EVENT(xfile_destroy,
757+
TP_PROTO(struct xfile *xf),
758+
TP_ARGS(xf),
759+
TP_STRUCT__entry(
760+
__field(unsigned long, ino)
761+
__field(unsigned long long, bytes)
762+
__field(loff_t, size)
763+
),
764+
TP_fast_assign(
765+
struct xfile_stat statbuf;
766+
int ret;
767+
768+
ret = xfile_stat(xf, &statbuf);
769+
if (!ret) {
770+
__entry->bytes = statbuf.bytes;
771+
__entry->size = statbuf.size;
772+
} else {
773+
__entry->bytes = -1;
774+
__entry->size = -1;
775+
}
776+
__entry->ino = file_inode(xf->file)->i_ino;
777+
),
778+
TP_printk("xfino 0x%lx mem_bytes 0x%llx isize 0x%llx",
779+
__entry->ino,
780+
__entry->bytes,
781+
__entry->size)
782+
);
783+
784+
DECLARE_EVENT_CLASS(xfile_class,
785+
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount),
786+
TP_ARGS(xf, pos, bytecount),
787+
TP_STRUCT__entry(
788+
__field(unsigned long, ino)
789+
__field(unsigned long long, bytes_used)
790+
__field(loff_t, pos)
791+
__field(loff_t, size)
792+
__field(unsigned long long, bytecount)
793+
),
794+
TP_fast_assign(
795+
struct xfile_stat statbuf;
796+
int ret;
797+
798+
ret = xfile_stat(xf, &statbuf);
799+
if (!ret) {
800+
__entry->bytes_used = statbuf.bytes;
801+
__entry->size = statbuf.size;
802+
} else {
803+
__entry->bytes_used = -1;
804+
__entry->size = -1;
805+
}
806+
__entry->ino = file_inode(xf->file)->i_ino;
807+
__entry->pos = pos;
808+
__entry->bytecount = bytecount;
809+
),
810+
TP_printk("xfino 0x%lx mem_bytes 0x%llx pos 0x%llx bytecount 0x%llx isize 0x%llx",
811+
__entry->ino,
812+
__entry->bytes_used,
813+
__entry->pos,
814+
__entry->bytecount,
815+
__entry->size)
816+
);
817+
#define DEFINE_XFILE_EVENT(name) \
818+
DEFINE_EVENT(xfile_class, name, \
819+
TP_PROTO(struct xfile *xf, loff_t pos, unsigned long long bytecount), \
820+
TP_ARGS(xf, pos, bytecount))
821+
DEFINE_XFILE_EVENT(xfile_pread);
822+
DEFINE_XFILE_EVENT(xfile_pwrite);
823+
DEFINE_XFILE_EVENT(xfile_seek_data);
824+
DEFINE_XFILE_EVENT(xfile_get_page);
825+
DEFINE_XFILE_EVENT(xfile_put_page);
826+
827+
TRACE_EVENT(xfarray_create,
828+
TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity),
829+
TP_ARGS(xfa, required_capacity),
830+
TP_STRUCT__entry(
831+
__field(unsigned long, ino)
832+
__field(uint64_t, max_nr)
833+
__field(size_t, obj_size)
834+
__field(int, obj_size_log)
835+
__field(unsigned long long, required_capacity)
836+
),
837+
TP_fast_assign(
838+
__entry->max_nr = xfa->max_nr;
839+
__entry->obj_size = xfa->obj_size;
840+
__entry->obj_size_log = xfa->obj_size_log;
841+
__entry->ino = file_inode(xfa->xfile->file)->i_ino;
842+
__entry->required_capacity = required_capacity;
843+
),
844+
TP_printk("xfino 0x%lx max_nr %llu reqd_nr %llu objsz %zu objszlog %d",
845+
__entry->ino,
846+
__entry->max_nr,
847+
__entry->required_capacity,
848+
__entry->obj_size,
849+
__entry->obj_size_log)
850+
);
851+
852+
TRACE_EVENT(xfarray_isort,
853+
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
854+
TP_ARGS(si, lo, hi),
855+
TP_STRUCT__entry(
856+
__field(unsigned long, ino)
857+
__field(unsigned long long, lo)
858+
__field(unsigned long long, hi)
859+
),
860+
TP_fast_assign(
861+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
862+
__entry->lo = lo;
863+
__entry->hi = hi;
864+
),
865+
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
866+
__entry->ino,
867+
__entry->lo,
868+
__entry->hi,
869+
__entry->hi - __entry->lo)
870+
);
871+
872+
TRACE_EVENT(xfarray_pagesort,
873+
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
874+
TP_ARGS(si, lo, hi),
875+
TP_STRUCT__entry(
876+
__field(unsigned long, ino)
877+
__field(unsigned long long, lo)
878+
__field(unsigned long long, hi)
879+
),
880+
TP_fast_assign(
881+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
882+
__entry->lo = lo;
883+
__entry->hi = hi;
884+
),
885+
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu",
886+
__entry->ino,
887+
__entry->lo,
888+
__entry->hi,
889+
__entry->hi - __entry->lo)
890+
);
891+
892+
TRACE_EVENT(xfarray_qsort,
893+
TP_PROTO(struct xfarray_sortinfo *si, uint64_t lo, uint64_t hi),
894+
TP_ARGS(si, lo, hi),
895+
TP_STRUCT__entry(
896+
__field(unsigned long, ino)
897+
__field(unsigned long long, lo)
898+
__field(unsigned long long, hi)
899+
__field(int, stack_depth)
900+
__field(int, max_stack_depth)
901+
),
902+
TP_fast_assign(
903+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
904+
__entry->lo = lo;
905+
__entry->hi = hi;
906+
__entry->stack_depth = si->stack_depth;
907+
__entry->max_stack_depth = si->max_stack_depth;
908+
),
909+
TP_printk("xfino 0x%lx lo %llu hi %llu elts %llu stack %d/%d",
910+
__entry->ino,
911+
__entry->lo,
912+
__entry->hi,
913+
__entry->hi - __entry->lo,
914+
__entry->stack_depth,
915+
__entry->max_stack_depth)
916+
);
917+
918+
TRACE_EVENT(xfarray_sort,
919+
TP_PROTO(struct xfarray_sortinfo *si, size_t bytes),
920+
TP_ARGS(si, bytes),
921+
TP_STRUCT__entry(
922+
__field(unsigned long, ino)
923+
__field(unsigned long long, nr)
924+
__field(size_t, obj_size)
925+
__field(size_t, bytes)
926+
__field(unsigned int, max_stack_depth)
927+
),
928+
TP_fast_assign(
929+
__entry->nr = si->array->nr;
930+
__entry->obj_size = si->array->obj_size;
931+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
932+
__entry->bytes = bytes;
933+
__entry->max_stack_depth = si->max_stack_depth;
934+
),
935+
TP_printk("xfino 0x%lx nr %llu objsz %zu stack %u bytes %zu",
936+
__entry->ino,
937+
__entry->nr,
938+
__entry->obj_size,
939+
__entry->max_stack_depth,
940+
__entry->bytes)
941+
);
942+
943+
TRACE_EVENT(xfarray_sort_stats,
944+
TP_PROTO(struct xfarray_sortinfo *si, int error),
945+
TP_ARGS(si, error),
946+
TP_STRUCT__entry(
947+
__field(unsigned long, ino)
948+
#ifdef DEBUG
949+
__field(unsigned long long, loads)
950+
__field(unsigned long long, stores)
951+
__field(unsigned long long, compares)
952+
__field(unsigned long long, heapsorts)
953+
#endif
954+
__field(unsigned int, max_stack_depth)
955+
__field(unsigned int, max_stack_used)
956+
__field(int, error)
957+
),
958+
TP_fast_assign(
959+
__entry->ino = file_inode(si->array->xfile->file)->i_ino;
960+
#ifdef DEBUG
961+
__entry->loads = si->loads;
962+
__entry->stores = si->stores;
963+
__entry->compares = si->compares;
964+
__entry->heapsorts = si->heapsorts;
965+
#endif
966+
__entry->max_stack_depth = si->max_stack_depth;
967+
__entry->max_stack_used = si->max_stack_used;
968+
__entry->error = error;
969+
),
970+
TP_printk(
971+
#ifdef DEBUG
972+
"xfino 0x%lx loads %llu stores %llu compares %llu heapsorts %llu stack_depth %u/%u error %d",
973+
#else
974+
"xfino 0x%lx stack_depth %u/%u error %d",
975+
#endif
976+
__entry->ino,
977+
#ifdef DEBUG
978+
__entry->loads,
979+
__entry->stores,
980+
__entry->compares,
981+
__entry->heapsorts,
982+
#endif
983+
__entry->max_stack_used,
984+
__entry->max_stack_depth,
985+
__entry->error)
986+
);
987+
728988
/* repair tracepoints */
729989
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
730990

0 commit comments

Comments
 (0)