Skip to content

Commit 764018c

Browse files
author
Darrick J. Wong
committed
xfs: improve xfarray quicksort pivot
Now that we have the means to do insertion sorts of small in-memory subsets of an xfarray, use it to improve the quicksort pivot algorithm by reading 7 records into memory and finding the median of that. This should prevent bad partitioning when a[lo] and a[hi] end up next to each other in the final sort, which can happen when sorting for cntbt repair when the free space is extremely fragmented (e.g. generic/176). This doesn't speed up the average quicksort run by much, but it will (hopefully) avoid the quadratic time collapse for which quicksort is famous. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Kent Overstreet <[email protected]> Reviewed-by: Dave Chinner <[email protected]>
1 parent cf36f4f commit 764018c

File tree

2 files changed

+148
-69
lines changed

2 files changed

+148
-69
lines changed

fs/xfs/scrub/xfarray.c

Lines changed: 134 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,14 @@ static inline xfarray_idx_t *xfarray_sortinfo_hi(struct xfarray_sortinfo *si)
427427
return xfarray_sortinfo_lo(si) + si->max_stack_depth;
428428
}
429429

430+
/* Size of each element in the quicksort pivot array. */
431+
static inline size_t
432+
xfarray_pivot_rec_sz(
433+
struct xfarray *array)
434+
{
435+
return round_up(array->obj_size, 8) + sizeof(xfarray_idx_t);
436+
}
437+
430438
/* Allocate memory to handle the sort. */
431439
static inline int
432440
xfarray_sortinfo_alloc(
@@ -437,8 +445,16 @@ xfarray_sortinfo_alloc(
437445
{
438446
struct xfarray_sortinfo *si;
439447
size_t nr_bytes = sizeof(struct xfarray_sortinfo);
448+
size_t pivot_rec_sz = xfarray_pivot_rec_sz(array);
440449
int max_stack_depth;
441450

451+
/*
452+
* The median-of-nine pivot algorithm doesn't work if a subset has
453+
* fewer than 9 items. Make sure the in-memory sort will always take
454+
* over for subsets where this wouldn't be the case.
455+
*/
456+
BUILD_BUG_ON(XFARRAY_QSORT_PIVOT_NR >= XFARRAY_ISORT_NR);
457+
442458
/*
443459
* Tail-call recursion during the partitioning phase means that
444460
* quicksort will never recurse more than log2(nr) times. We need one
@@ -453,8 +469,10 @@ xfarray_sortinfo_alloc(
453469
/* Each level of quicksort uses a lo and a hi index */
454470
nr_bytes += max_stack_depth * sizeof(xfarray_idx_t) * 2;
455471

456-
/* Scratchpad for in-memory sort, or one record for the pivot */
457-
nr_bytes += (XFARRAY_ISORT_NR * array->obj_size);
472+
/* Scratchpad for in-memory sort, or finding the pivot */
473+
nr_bytes += max_t(size_t,
474+
(XFARRAY_QSORT_PIVOT_NR + 1) * pivot_rec_sz,
475+
XFARRAY_ISORT_NR * array->obj_size);
458476

459477
si = kvzalloc(nr_bytes, XCHK_GFP_FLAGS);
460478
if (!si)
@@ -632,91 +650,143 @@ static inline void *xfarray_sortinfo_pivot(struct xfarray_sortinfo *si)
632650
return xfarray_sortinfo_hi(si) + si->max_stack_depth;
633651
}
634652

653+
/* Return a pointer to the start of the pivot array. */
654+
static inline void *
655+
xfarray_sortinfo_pivot_array(
656+
struct xfarray_sortinfo *si)
657+
{
658+
return xfarray_sortinfo_pivot(si) + si->array->obj_size;
659+
}
660+
661+
/* The xfarray record is stored at the start of each pivot array element. */
662+
static inline void *
663+
xfarray_pivot_array_rec(
664+
void *pa,
665+
size_t pa_recsz,
666+
unsigned int pa_idx)
667+
{
668+
return pa + (pa_recsz * pa_idx);
669+
}
670+
671+
/* The xfarray index is stored at the end of each pivot array element. */
672+
static inline xfarray_idx_t *
673+
xfarray_pivot_array_idx(
674+
void *pa,
675+
size_t pa_recsz,
676+
unsigned int pa_idx)
677+
{
678+
return xfarray_pivot_array_rec(pa, pa_recsz, pa_idx + 1) -
679+
sizeof(xfarray_idx_t);
680+
}
681+
635682
/*
636683
* Find a pivot value for quicksort partitioning, swap it with a[lo], and save
637684
* the cached pivot record for the next step.
638685
*
639-
* Select the median value from a[lo], a[mid], and a[hi]. Put the median in
640-
* a[lo], the lowest in a[mid], and the highest in a[hi]. Using the median of
641-
* the three reduces the chances that we pick the worst case pivot value, since
642-
* it's likely that our array values are nearly sorted.
686+
* Load evenly-spaced records within the given range into memory, sort them,
687+
* and choose the pivot from the median record. Using multiple points will
688+
* improve the quality of the pivot selection, and hopefully avoid the worst
689+
* quicksort behavior, since our array values are nearly always evenly sorted.
643690
*/
644691
STATIC int
645692
xfarray_qsort_pivot(
646693
struct xfarray_sortinfo *si,
647694
xfarray_idx_t lo,
648695
xfarray_idx_t hi)
649696
{
650-
void *a = xfarray_sortinfo_pivot(si);
651-
void *b = xfarray_scratch(si->array);
652-
xfarray_idx_t mid = lo + ((hi - lo) / 2);
697+
void *pivot = xfarray_sortinfo_pivot(si);
698+
void *parray = xfarray_sortinfo_pivot_array(si);
699+
void *recp;
700+
xfarray_idx_t *idxp;
701+
xfarray_idx_t step = (hi - lo) / (XFARRAY_QSORT_PIVOT_NR - 1);
702+
size_t pivot_rec_sz = xfarray_pivot_rec_sz(si->array);
703+
int i, j;
653704
int error;
654705

655-
/* if a[mid] < a[lo], swap a[mid] and a[lo]. */
656-
error = xfarray_sort_load(si, mid, a);
657-
if (error)
658-
return error;
659-
error = xfarray_sort_load(si, lo, b);
660-
if (error)
661-
return error;
662-
if (xfarray_sort_cmp(si, a, b) < 0) {
663-
error = xfarray_sort_store(si, lo, a);
664-
if (error)
665-
return error;
666-
error = xfarray_sort_store(si, mid, b);
667-
if (error)
668-
return error;
669-
}
706+
ASSERT(step > 0);
670707

671-
/* if a[hi] < a[mid], swap a[mid] and a[hi]. */
672-
error = xfarray_sort_load(si, hi, a);
673-
if (error)
674-
return error;
675-
error = xfarray_sort_load(si, mid, b);
676-
if (error)
677-
return error;
678-
if (xfarray_sort_cmp(si, a, b) < 0) {
679-
error = xfarray_sort_store(si, mid, a);
680-
if (error)
681-
return error;
682-
error = xfarray_sort_store(si, hi, b);
683-
if (error)
684-
return error;
685-
} else {
686-
goto move_front;
708+
/*
709+
* Load the xfarray indexes of the records we intend to sample into the
710+
* pivot array.
711+
*/
712+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, 0);
713+
*idxp = lo;
714+
for (i = 1; i < XFARRAY_QSORT_PIVOT_NR - 1; i++) {
715+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
716+
*idxp = lo + (i * step);
687717
}
718+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
719+
XFARRAY_QSORT_PIVOT_NR - 1);
720+
*idxp = hi;
688721

689-
/* if a[mid] < a[lo], swap a[mid] and a[lo]. */
690-
error = xfarray_sort_load(si, mid, a);
691-
if (error)
692-
return error;
693-
error = xfarray_sort_load(si, lo, b);
694-
if (error)
695-
return error;
696-
if (xfarray_sort_cmp(si, a, b) < 0) {
697-
error = xfarray_sort_store(si, lo, a);
698-
if (error)
699-
return error;
700-
error = xfarray_sort_store(si, mid, b);
722+
/* Load the selected xfarray records into the pivot array. */
723+
for (i = 0; i < XFARRAY_QSORT_PIVOT_NR; i++) {
724+
xfarray_idx_t idx;
725+
726+
recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, i);
727+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
728+
729+
/* No unset records; load directly into the array. */
730+
if (likely(si->array->unset_slots == 0)) {
731+
error = xfarray_sort_load(si, *idxp, recp);
732+
if (error)
733+
return error;
734+
continue;
735+
}
736+
737+
/*
738+
* Load non-null records into the scratchpad without changing
739+
* the xfarray_idx_t in the pivot array.
740+
*/
741+
idx = *idxp;
742+
xfarray_sort_bump_loads(si);
743+
error = xfarray_load_next(si->array, &idx, recp);
701744
if (error)
702745
return error;
703746
}
704747

705-
move_front:
748+
xfarray_sort_bump_heapsorts(si);
749+
sort(parray, XFARRAY_QSORT_PIVOT_NR, pivot_rec_sz, si->cmp_fn, NULL);
750+
706751
/*
707-
* Move our selected pivot to a[lo]. Recall that a == si->pivot, so
708-
* this leaves us with the pivot cached in the sortinfo structure.
752+
* We sorted the pivot array records (which includes the xfarray
753+
* indices) in xfarray record order. The median element of the pivot
754+
* array contains the xfarray record that we will use as the pivot.
755+
* Copy that xfarray record to the designated space.
709756
*/
710-
error = xfarray_sort_load(si, lo, b);
711-
if (error)
712-
return error;
713-
error = xfarray_sort_load(si, mid, a);
714-
if (error)
715-
return error;
716-
error = xfarray_sort_store(si, mid, b);
757+
recp = xfarray_pivot_array_rec(parray, pivot_rec_sz,
758+
XFARRAY_QSORT_PIVOT_NR / 2);
759+
memcpy(pivot, recp, si->array->obj_size);
760+
761+
/* If the pivot record we chose was already in a[lo] then we're done. */
762+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
763+
XFARRAY_QSORT_PIVOT_NR / 2);
764+
if (*idxp == lo)
765+
return 0;
766+
767+
/*
768+
* Find the cached copy of a[lo] in the pivot array so that we can swap
769+
* a[lo] and a[pivot].
770+
*/
771+
for (i = 0, j = -1; i < XFARRAY_QSORT_PIVOT_NR; i++) {
772+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz, i);
773+
if (*idxp == lo)
774+
j = i;
775+
}
776+
if (j < 0) {
777+
ASSERT(j >= 0);
778+
return -EFSCORRUPTED;
779+
}
780+
781+
/* Swap a[lo] and a[pivot]. */
782+
error = xfarray_sort_store(si, lo, pivot);
717783
if (error)
718784
return error;
719-
return xfarray_sort_store(si, lo, a);
785+
786+
recp = xfarray_pivot_array_rec(parray, pivot_rec_sz, j);
787+
idxp = xfarray_pivot_array_idx(parray, pivot_rec_sz,
788+
XFARRAY_QSORT_PIVOT_NR / 2);
789+
return xfarray_sort_store(si, *idxp, recp);
720790
}
721791

722792
/*
@@ -828,7 +898,7 @@ xfarray_sort_load_cached(
828898
* particularly expensive in the kernel.
829899
*
830900
* 2. For arrays with records in arbitrary or user-controlled order, choose the
831-
* pivot element using a median-of-three decision tree. This reduces the
901+
* pivot element using a median-of-nine decision tree. This reduces the
832902
* probability of selecting a bad pivot value which causes worst case
833903
* behavior (i.e. partition sizes of 1).
834904
*

fs/xfs/scrub/xfarray.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ typedef cmp_func_t xfarray_cmp_fn;
6262
#define XFARRAY_ISORT_SHIFT (4)
6363
#define XFARRAY_ISORT_NR (1U << XFARRAY_ISORT_SHIFT)
6464

65+
/* Evalulate this many points to find the qsort pivot. */
66+
#define XFARRAY_QSORT_PIVOT_NR (9)
67+
6568
struct xfarray_sortinfo {
6669
struct xfarray *array;
6770

@@ -91,7 +94,6 @@ struct xfarray_sortinfo {
9194
uint64_t compares;
9295
uint64_t heapsorts;
9396
#endif
94-
9597
/*
9698
* Extra bytes are allocated beyond the end of the structure to store
9799
* quicksort information. C does not permit multiple VLAs per struct,
@@ -114,11 +116,18 @@ struct xfarray_sortinfo {
114116
* xfarray_rec_t scratch[ISORT_NR];
115117
*
116118
* Otherwise, we want to partition the records to partition the array.
117-
* We store the chosen pivot record here and use the xfarray scratchpad
118-
* to rearrange the array around the pivot:
119-
*
120-
* xfarray_rec_t pivot;
119+
* We store the chosen pivot record at the start of the scratchpad area
120+
* and use the rest to sample some records to estimate the median.
121+
* The format of the qsort_pivot array enables us to use the kernel
122+
* heapsort function to place the median value in the middle.
121123
*
124+
* struct {
125+
* xfarray_rec_t pivot;
126+
* struct {
127+
* xfarray_rec_t rec; (rounded up to 8 bytes)
128+
* xfarray_idx_t idx;
129+
* } qsort_pivot[QSORT_PIVOT_NR];
130+
* };
122131
* }
123132
*/
124133
};

0 commit comments

Comments
 (0)